diff options
Diffstat (limited to 'posix')
-rw-r--r-- | posix/TESTS | 8 | ||||
-rw-r--r-- | posix/getopt.c | 3 | ||||
-rw-r--r-- | posix/getopt.h | 4 | ||||
-rw-r--r-- | posix/getopt1.c | 4 | ||||
-rw-r--r-- | posix/regex.c | 96 | ||||
-rw-r--r-- | posix/regex.h | 21 |
6 files changed, 108 insertions, 28 deletions
diff --git a/posix/TESTS b/posix/TESTS index 2bb7c93..4f1c49f 100644 --- a/posix/TESTS +++ b/posix/TESTS @@ -157,3 +157,11 @@ 0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar Qudhafi 0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qaddafi 0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi +0:[[:digit:]]+:01234 +1:[[:alpha:]]+:01234 +0:^[[:digit:]]*$:01234 +1:^[[:digit:]]*$:01234a +0:^[[:alnum:]]*$:01234a +0:^[[:xdigit:]]*$:01234a +1:^[[:xdigit:]]*$:01234g +0:^[[:alnum:][:space:]]*$:Hello world diff --git a/posix/getopt.c b/posix/getopt.c index 4cbefa1..59b51cd 100644 --- a/posix/getopt.c +++ b/posix/getopt.c @@ -6,9 +6,6 @@ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc. - This file is part of the GNU C Library. Its master source is NOT part of - the C library, however. The master source lives in /gd/gnu/lib. - The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the diff --git a/posix/getopt.h b/posix/getopt.h index 7dad11b..d6ceb0e 100644 --- a/posix/getopt.h +++ b/posix/getopt.h @@ -1,8 +1,6 @@ /* Declarations for getopt. Copyright (C) 1989,90,91,92,93,94,96,97 Free Software Foundation, Inc. - - This file is part of the GNU C Library. Its master source is NOT part of - the C library, however. The master source lives in /gd/gnu/lib. + This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as diff --git a/posix/getopt1.c b/posix/getopt1.c index 8347bb1..4aa8de6 100644 --- a/posix/getopt1.c +++ b/posix/getopt1.c @@ -1,8 +1,6 @@ /* getopt_long and getopt_long_only entry points for GNU getopt. Copyright (C) 1987,88,89,90,91,92,93,94,96,97 Free Software Foundation, Inc. - - This file is part of the GNU C Library. Its master source is NOT part of - the C library, however. The master source lives in /gd/gnu/lib. + This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as diff --git a/posix/regex.c b/posix/regex.c index b7c82f6..fc4db38 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -1,6 +1,6 @@ /* Extended regular expression matching and search library, version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for + (Implements POSIX draft P1003.2/D11.2, except for some of the internationalization features.) Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. @@ -42,6 +42,13 @@ #include <sys/types.h> #endif +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +# include <wctype.h> +# include <wchar.h> +#endif + /* This is for other GNU distributions with internationalized messages. */ #if HAVE_LIBINTL_H || defined (_LIBC) # include <libintl.h> @@ -946,6 +953,12 @@ re_set_syntax (syntax) reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax; +#ifdef DEBUG + if (syntax & RE_DEBUG) + debug = 1; + else if (debug) /* was on but now is not */ + debug = 0; +#endif /* DEBUG */ return ret; } @@ -1026,22 +1039,24 @@ static const char *re_error_msgid[] = #endif /* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. + exactly that if always used MAX_FAILURE_ITEMS items each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */ #ifdef INT_IS_16BIT #if defined (MATCH_MAY_ALLOCATE) -long re_max_failures = 4000; +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +long int re_max_failures = 4000; #else -long re_max_failures = 2000; +long int re_max_failures = 2000; #endif union fail_stack_elt { unsigned char *pointer; - long integer; + long int integer; }; typedef union fail_stack_elt fail_stack_elt_t; @@ -1049,8 +1064,8 @@ typedef union fail_stack_elt fail_stack_elt_t; typedef struct { fail_stack_elt_t *stack; - unsigned long size; - unsigned long avail; /* Offset of next open position. */ + unsigned long int size; + unsigned long int avail; /* Offset of next open position. */ } fail_stack_type; #else /* not INT_IS_16BIT */ @@ -1058,7 +1073,7 @@ typedef struct #if defined (MATCH_MAY_ALLOCATE) /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ -int re_max_failures = 4000; +int re_max_failures = 20000; #else int re_max_failures = 2000; #endif @@ -1661,15 +1676,29 @@ typedef struct } \ } -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# define IS_CHAR_CLASS(string) wctype (string) +#else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ -#define IS_CHAR_CLASS(string) \ +# define IS_CHAR_CLASS(string) \ (STREQ (string, "alpha") || STREQ (string, "upper") \ || STREQ (string, "lower") || STREQ (string, "digit") \ || STREQ (string, "alnum") || STREQ (string, "xdigit") \ || STREQ (string, "space") || STREQ (string, "print") \ || STREQ (string, "punct") || STREQ (string, "graph") \ || STREQ (string, "cntrl") || STREQ (string, "blank")) +#endif #ifndef MATCH_MAY_ALLOCATE @@ -2147,6 +2176,34 @@ regex_compile (pattern, size, syntax, bufp) the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + boolean is_lower = STREQ (str, "lower"); + boolean is_upper = STREQ (str, "upper"); + wctype_t wt; + int ch; + + wt = wctype (str); + if (wt == 0) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) + { + if (iswctype (btowc (ch), wt)) + SET_LIST_BIT (ch); + + if (translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + + had_char_class = true; +#else int ch; boolean is_alnum = STREQ (str, "alnum"); boolean is_alpha = STREQ (str, "alpha"); @@ -2194,6 +2251,7 @@ regex_compile (pattern, size, syntax, bufp) SET_LIST_BIT (ch); } had_char_class = true; +#endif /* libc || wctype.h */ } else { @@ -3551,12 +3609,14 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ == Sword) +/* Disabled due to a compiler bug -- see comment at case wordbound */ +#if 0 /* Test if the character before D and the one at D differ with respect to being word-constituent. */ #define AT_WORD_BOUNDARY(d) \ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - +#endif /* Free everything we malloc. */ #ifdef MATCH_MAY_ALLOCATE @@ -4725,6 +4785,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) dummy_low_reg, dummy_high_reg, reg_dummy, reg_dummy, reg_info_dummy); } + /* Note fall through. */ unconditional_jump: #ifdef _LIBC @@ -5355,7 +5416,13 @@ re_compile_pattern (pattern, length, bufp) /* BSD has one and only one pattern buffer. */ static struct re_pattern_buffer re_comp_buf; -char * weak_function +char * +#ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec below without link errors. */ +weak_function +#endif re_comp (s) const char *s; { @@ -5396,7 +5463,10 @@ re_comp (s) } -int weak_function +int +#ifdef _LIBC +weak_function +#endif re_exec (s) const char *s; { diff --git a/posix/regex.h b/posix/regex.h index ae0165e..8e2bd8f 100644 --- a/posix/regex.h +++ b/posix/regex.h @@ -152,6 +152,14 @@ typedef unsigned long int reg_syntax_t; If not set, then the GNU regex operators are recognized. */ #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +#define RE_DEBUG (RE_NO_GNU_OPS << 1) + /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect @@ -168,15 +176,16 @@ extern reg_syntax_t re_syntax_options; (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_DOT_NEWLINE \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) #define RE_SYNTAX_GNU_AWK \ - ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS)) + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) #define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_NO_GNU_OPS) + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) #define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ @@ -316,10 +325,10 @@ struct re_pattern_buffer unsigned char *buffer; /* Number of bytes to which `buffer' points. */ - unsigned long allocated; + unsigned long int allocated; /* Number of bytes actually used in `buffer'. */ - unsigned long used; + unsigned long int used; /* Syntax setting with which the pattern was compiled. */ reg_syntax_t syntax; |