aboutsummaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
Diffstat (limited to 'posix')
-rw-r--r--posix/TESTS8
-rw-r--r--posix/getopt.c3
-rw-r--r--posix/getopt.h4
-rw-r--r--posix/getopt1.c4
-rw-r--r--posix/regex.c96
-rw-r--r--posix/regex.h21
6 files changed, 108 insertions, 28 deletions
diff --git a/posix/TESTS b/posix/TESTS
index 2bb7c93..4f1c49f 100644
--- a/posix/TESTS
+++ b/posix/TESTS
@@ -157,3 +157,11 @@
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar Qudhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi
+0:[[:digit:]]+:01234
+1:[[:alpha:]]+:01234
+0:^[[:digit:]]*$:01234
+1:^[[:digit:]]*$:01234a
+0:^[[:alnum:]]*$:01234a
+0:^[[:xdigit:]]*$:01234a
+1:^[[:xdigit:]]*$:01234g
+0:^[[:alnum:][:space:]]*$:Hello world
diff --git a/posix/getopt.c b/posix/getopt.c
index 4cbefa1..59b51cd 100644
--- a/posix/getopt.c
+++ b/posix/getopt.c
@@ -6,9 +6,6 @@
Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97
Free Software Foundation, Inc.
- This file is part of the GNU C Library. Its master source is NOT part of
- the C library, however. The master source lives in /gd/gnu/lib.
-
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
diff --git a/posix/getopt.h b/posix/getopt.h
index 7dad11b..d6ceb0e 100644
--- a/posix/getopt.h
+++ b/posix/getopt.h
@@ -1,8 +1,6 @@
/* Declarations for getopt.
Copyright (C) 1989,90,91,92,93,94,96,97 Free Software Foundation, Inc.
-
- This file is part of the GNU C Library. Its master source is NOT part of
- the C library, however. The master source lives in /gd/gnu/lib.
+ This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
diff --git a/posix/getopt1.c b/posix/getopt1.c
index 8347bb1..4aa8de6 100644
--- a/posix/getopt1.c
+++ b/posix/getopt1.c
@@ -1,8 +1,6 @@
/* getopt_long and getopt_long_only entry points for GNU getopt.
Copyright (C) 1987,88,89,90,91,92,93,94,96,97 Free Software Foundation, Inc.
-
- This file is part of the GNU C Library. Its master source is NOT part of
- the C library, however. The master source lives in /gd/gnu/lib.
+ This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
diff --git a/posix/regex.c b/posix/regex.c
index b7c82f6..fc4db38 100644
--- a/posix/regex.c
+++ b/posix/regex.c
@@ -1,6 +1,6 @@
/* Extended regular expression matching and search library,
version 0.12.
- (Implements POSIX draft P10003.2/D11.2, except for
+ (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
@@ -42,6 +42,13 @@
#include <sys/types.h>
#endif
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+# include <wctype.h>
+# include <wchar.h>
+#endif
+
/* This is for other GNU distributions with internationalized messages. */
#if HAVE_LIBINTL_H || defined (_LIBC)
# include <libintl.h>
@@ -946,6 +953,12 @@ re_set_syntax (syntax)
reg_syntax_t ret = re_syntax_options;
re_syntax_options = syntax;
+#ifdef DEBUG
+ if (syntax & RE_DEBUG)
+ debug = 1;
+ else if (debug) /* was on but now is not */
+ debug = 0;
+#endif /* DEBUG */
return ret;
}
@@ -1026,22 +1039,24 @@ static const char *re_error_msgid[] =
#endif
/* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_SPACE each time we failed.
+ exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
This is a variable only so users of regex can assign to it; we never
change it ourselves. */
#ifdef INT_IS_16BIT
#if defined (MATCH_MAY_ALLOCATE)
-long re_max_failures = 4000;
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+long int re_max_failures = 4000;
#else
-long re_max_failures = 2000;
+long int re_max_failures = 2000;
#endif
union fail_stack_elt
{
unsigned char *pointer;
- long integer;
+ long int integer;
};
typedef union fail_stack_elt fail_stack_elt_t;
@@ -1049,8 +1064,8 @@ typedef union fail_stack_elt fail_stack_elt_t;
typedef struct
{
fail_stack_elt_t *stack;
- unsigned long size;
- unsigned long avail; /* Offset of next open position. */
+ unsigned long int size;
+ unsigned long int avail; /* Offset of next open position. */
} fail_stack_type;
#else /* not INT_IS_16BIT */
@@ -1058,7 +1073,7 @@ typedef struct
#if defined (MATCH_MAY_ALLOCATE)
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
-int re_max_failures = 4000;
+int re_max_failures = 20000;
#else
int re_max_failures = 2000;
#endif
@@ -1661,15 +1676,29 @@ typedef struct
} \
}
-#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# define IS_CHAR_CLASS(string) wctype (string)
+#else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
-#define IS_CHAR_CLASS(string) \
+# define IS_CHAR_CLASS(string) \
(STREQ (string, "alpha") || STREQ (string, "upper") \
|| STREQ (string, "lower") || STREQ (string, "digit") \
|| STREQ (string, "alnum") || STREQ (string, "xdigit") \
|| STREQ (string, "space") || STREQ (string, "print") \
|| STREQ (string, "punct") || STREQ (string, "graph") \
|| STREQ (string, "cntrl") || STREQ (string, "blank"))
+#endif
#ifndef MATCH_MAY_ALLOCATE
@@ -2147,6 +2176,34 @@ regex_compile (pattern, size, syntax, bufp)
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_upper = STREQ (str, "upper");
+ wctype_t wt;
+ int ch;
+
+ wt = wctype (str);
+ if (wt == 0)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+ {
+ if (iswctype (btowc (ch), wt))
+ SET_LIST_BIT (ch);
+
+ if (translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
+ }
+
+ had_char_class = true;
+#else
int ch;
boolean is_alnum = STREQ (str, "alnum");
boolean is_alpha = STREQ (str, "alpha");
@@ -2194,6 +2251,7 @@ regex_compile (pattern, size, syntax, bufp)
SET_LIST_BIT (ch);
}
had_char_class = true;
+#endif /* libc || wctype.h */
}
else
{
@@ -3551,12 +3609,14 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
: (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
== Sword)
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+#if 0
/* Test if the character before D and the one at D differ with respect
to being word-constituent. */
#define AT_WORD_BOUNDARY(d) \
(AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
|| WORDCHAR_P (d - 1) != WORDCHAR_P (d))
-
+#endif
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
@@ -4725,6 +4785,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
dummy_low_reg, dummy_high_reg,
reg_dummy, reg_dummy, reg_info_dummy);
}
+ /* Note fall through. */
unconditional_jump:
#ifdef _LIBC
@@ -5355,7 +5416,13 @@ re_compile_pattern (pattern, length, bufp)
/* BSD has one and only one pattern buffer. */
static struct re_pattern_buffer re_comp_buf;
-char * weak_function
+char *
+#ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec below without link errors. */
+weak_function
+#endif
re_comp (s)
const char *s;
{
@@ -5396,7 +5463,10 @@ re_comp (s)
}
-int weak_function
+int
+#ifdef _LIBC
+weak_function
+#endif
re_exec (s)
const char *s;
{
diff --git a/posix/regex.h b/posix/regex.h
index ae0165e..8e2bd8f 100644
--- a/posix/regex.h
+++ b/posix/regex.h
@@ -152,6 +152,14 @@ typedef unsigned long int reg_syntax_t;
If not set, then the GNU regex operators are recognized. */
#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
@@ -168,15 +176,16 @@ extern reg_syntax_t re_syntax_options;
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_DOT_NEWLINE \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
#define RE_SYNTAX_GNU_AWK \
- ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) \
- & ~(RE_DOT_NOT_NULL | RE_INTERVALS))
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_NO_GNU_OPS)
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
@@ -316,10 +325,10 @@ struct re_pattern_buffer
unsigned char *buffer;
/* Number of bytes to which `buffer' points. */
- unsigned long allocated;
+ unsigned long int allocated;
/* Number of bytes actually used in `buffer'. */
- unsigned long used;
+ unsigned long int used;
/* Syntax setting with which the pattern was compiled. */
reg_syntax_t syntax;