aboutsummaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
Diffstat (limited to 'posix')
-rw-r--r--posix/regex.c118
-rw-r--r--posix/regex.h8
2 files changed, 53 insertions, 73 deletions
diff --git a/posix/regex.c b/posix/regex.c
index fc25bb0..4c90a4f 100644
--- a/posix/regex.c
+++ b/posix/regex.c
@@ -2138,21 +2138,21 @@ typedef struct
/* Get the next unsigned number in the uncompiled pattern. */
-#define GET_UNSIGNED_NUMBER(num) \
- { if (p != pend) \
- { \
- PATFETCH (c); \
- while ('0' <= c && c <= '9') \
- { \
- if (num < 0) \
- num = 0; \
- num = num * 10 + c - '0'; \
- if (p == pend) \
- break; \
- PATFETCH (c); \
- } \
- } \
- }
+#define GET_UNSIGNED_NUMBER(num) \
+ { \
+ while (p != pend) \
+ { \
+ PATFETCH (c); \
+ if (c < '0' || c > '9') \
+ break; \
+ if (num <= RE_DUP_MAX) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ } \
+ } \
+ }
#if defined _LIBC || WIDE_CHAR_SUPPORT
/* The GNU C library provides support for user-defined character classes
@@ -2326,14 +2326,6 @@ regex_compile (pattern, size, syntax, bufp)
/* Address of beginning of regexp, or inside of last group. */
US_CHAR_TYPE *begalt;
- /* Place in the uncompiled pattern (i.e., the {) to
- which to go back if the interval is invalid. */
-#ifdef MBS_SUPPORT
- const US_CHAR_TYPE *beg_interval;
-#else
- const char *beg_interval;
-#endif /* MBS_SUPPORT */
-
/* Address of the place where a forward jump should go to the end of
the containing expression. Each alternative of an `or' -- except the
last -- ends with a forward jump of this sort. */
@@ -3827,25 +3819,19 @@ regex_compile (pattern, size, syntax, bufp)
/* At least (most) this many matches must be made. */
int lower_bound = -1, upper_bound = -1;
- beg_interval = p - 1;
+
+ /* Place in the uncompiled pattern (i.e., just after
+ the '{') to go back to if the interval is invalid. */
+ const CHAR_TYPE *beg_interval = p;
if (p == pend)
- {
- if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- goto unfetch_interval;
- else
- FREE_STACK_RETURN (REG_EBRACE);
- }
+ goto invalid_interval;
GET_UNSIGNED_NUMBER (lower_bound);
if (c == ',')
{
GET_UNSIGNED_NUMBER (upper_bound);
- if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
- || ((syntax & RE_NO_BK_BRACES) && c != '}'))
- FREE_STACK_RETURN (REG_BADBR);
-
if (upper_bound < 0)
upper_bound = RE_DUP_MAX;
}
@@ -3853,36 +3839,24 @@ regex_compile (pattern, size, syntax, bufp)
/* Interval such as `{1}' => match exactly once. */
upper_bound = lower_bound;
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound)
- {
- if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- goto unfetch_interval;
- else
- FREE_STACK_RETURN (REG_BADBR);
- }
+ if (! (0 <= lower_bound && lower_bound <= upper_bound))
+ goto invalid_interval;
if (!(syntax & RE_NO_BK_BRACES))
{
- if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
-
+ if (c != '\\' || p == pend)
+ goto invalid_interval;
PATFETCH (c);
}
if (c != '}')
- {
- if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- goto unfetch_interval;
- else
- FREE_STACK_RETURN (REG_BADBR);
- }
-
- /* We just parsed a valid interval. */
+ goto invalid_interval;
/* If it's invalid to have no preceding re. */
if (!laststart)
{
- if (syntax & RE_CONTEXT_INVALID_OPS)
+ if (syntax & RE_CONTEXT_INVALID_OPS
+ && !(syntax & RE_INVALID_INTERVAL_ORD))
FREE_STACK_RETURN (REG_BADRPT);
else if (syntax & RE_CONTEXT_INDEP_OPS)
laststart = b;
@@ -3890,6 +3864,11 @@ regex_compile (pattern, size, syntax, bufp)
goto unfetch_interval;
}
+ /* We just parsed a valid interval. */
+
+ if (RE_DUP_MAX < upper_bound)
+ FREE_STACK_RETURN (REG_BADBR);
+
/* If the upper bound is zero, don't want to succeed at
all; jump from `laststart' to `b + 3', which will be
the end of the buffer after we insert the jump. */
@@ -3975,25 +3954,20 @@ regex_compile (pattern, size, syntax, bufp)
}
}
pending_exact = 0;
- beg_interval = NULL;
- }
- break;
-
- unfetch_interval:
- /* If an invalid interval, match the characters as literals. */
- assert (beg_interval);
- p = beg_interval;
- beg_interval = NULL;
-
- /* normal_char and normal_backslash need `c'. */
- PATFETCH (c);
-
- if (!(syntax & RE_NO_BK_BRACES))
- {
- if (p > pattern && p[-1] == '\\')
- goto normal_backslash;
- }
- goto normal_char;
+ break;
+
+ invalid_interval:
+ if (!(syntax & RE_INVALID_INTERVAL_ORD))
+ FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
+ unfetch_interval:
+ /* Match the characters as literals. */
+ p = beg_interval;
+ c = '{';
+ if (syntax & RE_NO_BK_BRACES)
+ goto normal_char;
+ else
+ goto normal_backslash;
+ }
#ifdef emacs
/* There is no way to specify the before_dot and after_dot
diff --git a/posix/regex.h b/posix/regex.h
index 91a3560..63c2fef 100644
--- a/posix/regex.h
+++ b/posix/regex.h
@@ -160,6 +160,11 @@ typedef unsigned long int reg_syntax_t;
this bit set, and it won't affect anything in the normal case. */
#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
@@ -199,7 +204,8 @@ extern reg_syntax_t re_syntax_options;
| RE_NO_BK_VBAR)
#define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC