aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--posix/Makefile3
-rw-r--r--posix/bug-regex24.c59
-rw-r--r--posix/regex_internal.h6
-rw-r--r--posix/regexec.c25
5 files changed, 99 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index b02b433..d883905 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2004-11-12 Ulrich Drepper <drepper@redhat.com>
+
+ * posix/Makefile (tests): Add bug-regex24.
+ * posix/bug-regex24.c: New file.
+
+2004-11-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * posix/regexec.c (check_dst_limits_calc_pos_1): Use the map to
+ cut recursive paths. Make exit condition more precise.
+ (match_ctx_add_entry): Initialize the map.
+ * posix/regex_internal.h (struct re_backref_cache_entry): Add a map of
+ reachable subexpression nodes from each backreference cache entry.
+
2004-11-10 Jakub Jelinek <jakub@redhat.com>
* sysdeps/unix/sysv/linux/setreuid.c: Remove sys/syscall.h,
diff --git a/posix/Makefile b/posix/Makefile
index cd6a52c..8bc15ad 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -79,7 +79,8 @@ tests := tstgetopt testfnm runtests runptests \
bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \
bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
- bug-regex21 bug-regex22 bug-regex23 tst-nice tst-nanosleep \
+ bug-regex21 bug-regex22 bug-regex23 bug-regex24 \
+ tst-nice tst-nanosleep \
transbug tst-rxspencer tst-pcre tst-boost \
bug-ga1 tst-vfork1 tst-vfork2 tst-waitid \
tst-getaddrinfo2 bug-glob1 bug-glob2
diff --git a/posix/bug-regex24.c b/posix/bug-regex24.c
new file mode 100644
index 0000000..83ea10b
--- /dev/null
+++ b/posix/bug-regex24.c
@@ -0,0 +1,59 @@
+#include <regex.h>
+#include <stdio.h>
+
+#define str "civic"
+
+#define N 10
+static const char *expected[N] =
+ {
+ str, "c", "i", "", "", "", "", "", "", ""
+ };
+
+static int
+do_test (void)
+{
+ regex_t rbuf;
+ static const char pat[] = "\
+^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$";
+
+ int err = regcomp (&rbuf, pat, REG_EXTENDED);
+ if (err != 0)
+ {
+ char errstr[300];
+ regerror (err, &rbuf, errstr, sizeof (errstr));
+ puts (errstr);
+ return err;
+ }
+
+ regmatch_t m[N];
+ err = regexec (&rbuf, str, N, m, 0);
+ if (err != 0)
+ {
+ puts ("regexec failed");
+ return 1;
+ }
+
+ int result = 0;
+ for (int i = 0; i < N; ++i)
+ if (m[i].rm_so == -1)
+ {
+ printf ("m[%d] unused\n", i);
+ result = 1;
+ }
+ else
+ {
+ int len = m[i].rm_eo - m[i].rm_so;
+
+ printf ("m[%d] = \"%.*s\"\n", i, len, str + m[i].rm_so);
+
+ if (strlen (expected[i]) != len
+ || memcmp (expected[i], str + m[i].rm_so, len) != 0)
+ result = 1;
+ }
+
+ return result;
+}
+
+#define TIMEOUT 30
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 023056c..14d95a5 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -548,9 +548,9 @@ struct re_backref_cache_entry
int str_idx;
int subexp_from;
int subexp_to;
- /* We need only one byte from the following field. If other small
- fields are added the type could be changed to 'char'. */
- int more;
+ char more;
+ char unused;
+ unsigned short int eps_reachable_subexps_map;
};
typedef struct
diff --git a/posix/regexec.c b/posix/regexec.c
index 7910411..a03df26 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -1885,7 +1885,12 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
{
int dst, cpos;
- if (ent->node != node || ent->subexp_from != ent->subexp_to)
+ if (ent->node != node)
+ continue;
+
+ if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
+ && (ent->eps_reachable_subexps_map
+ & (1 << (subexp_idx - 1))) == 0)
continue;
/* Recurse trying to reach the OP_OPEN_SUBEXP and
@@ -1906,11 +1911,13 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
cpos = check_dst_limits_calc_pos_1 (mctx, boundaries,
subexp_idx, dst, bkref_idx);
- if (cpos == -1 && (boundaries & 1))
+ if (cpos == -1 /* && (boundaries & 1) */)
return -1;
- if (cpos == 0 /* && (boundaries & 2) */)
+ if (cpos == 0 && (boundaries & 2))
return 0;
+
+ ent->eps_reachable_subexps_map &= ~(1 << (subexp_idx - 1));
}
while (ent++->more);
break;
@@ -4169,6 +4176,18 @@ match_ctx_add_entry (mctx, node, str_idx, from, to)
mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+ /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+ If bit N is clear, means that this entry won't epsilon-transition to
+ an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
+ it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+ such node.
+
+ A backreference does not epsilon-transition unless it is empty, so set
+ to all zeros if FROM != TO. */
+ mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+ = (from == to ? ~0 : 0);
+
mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
if (mctx->max_mb_elem_len < to - from)
mctx->max_mb_elem_len = to - from;