aboutsummaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
Diffstat (limited to 'posix')
-rw-r--r--posix/Makefile11
-rw-r--r--posix/TESTS159
-rw-r--r--posix/TESTS2C.sed2
-rw-r--r--posix/regex.c204
-rw-r--r--posix/regex.h37
-rw-r--r--posix/runtests.c132
-rw-r--r--posix/testcases.h159
7 files changed, 651 insertions, 53 deletions
diff --git a/posix/Makefile b/posix/Makefile
index d4ae754..83509f6 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -27,7 +27,7 @@ headers := sys/utsname.h sys/times.h sys/wait.h sys/types.h unistd.h \
utsnamelen.h confname.h waitflags.h waitstatus.h sys/unistd.h \
sched.h schedbits.h re_comp.h wait.h
-distribute := confstr.h
+distribute := confstr.h TESTS TESTS2C.sed testcases.h
routines := \
uname \
@@ -49,12 +49,14 @@ routines := \
getaddrinfo gai_strerror
aux := init-posix environ
-tests := tstgetopt testfnm
+tests := tstgetopt testfnm runtests
others := getconf
install-bin := getconf
install-lib := libposix.a
gpl2lgpl := getopt.c getopt1.c getopt.h regex.c regex.h
+before-compile := testcases.h
+
include ../Rules
CFLAGS-regex.c = -Wno-unused -Wno-strict-prototypes
@@ -62,6 +64,11 @@ CFLAGS-getaddrinfo.c = -DRESOLVER
$(objpfx)libposix.a: $(dep-dummy-lib); $(make-dummy-lib)
lib: $(objpfx)libposix.a
+
+testcases.h: TESTS TESTS2C.sed
+ sed -f TESTS2C.sed < $< > $@T
+ mv -f $@T $@
+ test ! -d CVS || cvs commit -mRegenerated $@
# Make the standalone glob/fnmatch package.
diff --git a/posix/TESTS b/posix/TESTS
new file mode 100644
index 0000000..2bb7c93
--- /dev/null
+++ b/posix/TESTS
@@ -0,0 +1,159 @@
+0:(.*)*\1:xx
+0:^:
+0:$:
+0:^$:
+0:^a$:a
+0:abc:abc
+1:abc:xbc
+1:abc:axc
+1:abc:abx
+0:abc:xabcy
+0:abc:ababc
+0:ab*c:abc
+0:ab*bc:abc
+0:ab*bc:abbc
+0:ab*bc:abbbbc
+0:ab+bc:abbc
+1:ab+bc:abc
+1:ab+bc:abq
+0:ab+bc:abbbbc
+0:ab?bc:abbc
+0:ab?bc:abc
+1:ab?bc:abbbbc
+0:ab?c:abc
+0:^abc$:abc
+1:^abc$:abcc
+0:^abc:abcc
+1:^abc$:aabc
+0:abc$:aabc
+0:^:abc
+0:$:abc
+0:a.c:abc
+0:a.c:axc
+0:a.*c:axyzc
+1:a.*c:axyzd
+1:a[bc]d:abc
+0:a[bc]d:abd
+1:a[b-d]e:abd
+0:a[b-d]e:ace
+0:a[b-d]:aac
+0:a[-b]:a-
+0:a[b-]:a-
+1:a[b-a]:-
+2:a[]b:-
+2:a[:-
+0:a]:a]
+0:a[]]b:a]b
+0:a[^bc]d:aed
+1:a[^bc]d:abd
+0:a[^-b]c:adc
+1:a[^-b]c:a-c
+1:a[^]b]c:a]c
+0:a[^]b]c:adc
+0:ab|cd:abc
+0:ab|cd:abcd
+0:()ef:def
+0:()*:-
+1:*a:-
+0:^*:-
+0:$*:-
+1:(*)b:-
+1:$b:b
+2:a\:-
+0:a\(b:a(b
+0:a\(*b:ab
+0:a\(*b:a((b
+1:a\x:a\x
+1:abc):-
+2:(abc:-
+0:((a)):abc
+0:(a)b(c):abc
+0:a+b+c:aabbabc
+0:a**:-
+0:a*?:-
+0:(a*)*:-
+0:(a*)+:-
+0:(a|)*:-
+0:(a*|b)*:-
+0:(a+|b)*:ab
+0:(a+|b)+:ab
+0:(a+|b)?:ab
+0:[^ab]*:cde
+0:(^)*:-
+0:(ab|)*:-
+2:)(:-
+1:abc:
+1:abc:
+0:a*:
+0:([abc])*d:abbbcd
+0:([abc])*bcd:abcd
+0:a|b|c|d|e:e
+0:(a|b|c|d|e)f:ef
+0:((a*|b))*:-
+0:abcd*efg:abcdefg
+0:ab*:xabyabbbz
+0:ab*:xayabbbz
+0:(ab|cd)e:abcde
+0:[abhgefdc]ij:hij
+1:^(ab|cd)e:abcde
+0:(abc|)ef:abcdef
+0:(a|b)c*d:abcd
+0:(ab|ab*)bc:abc
+0:a([bc]*)c*:abc
+0:a([bc]*)(c*d):abcd
+0:a([bc]+)(c*d):abcd
+0:a([bc]*)(c+d):abcd
+0:a[bcd]*dcdcde:adcdcde
+1:a[bcd]+dcdcde:adcdcde
+0:(ab|a)b*c:abc
+0:((a)(b)c)(d):abcd
+0:[A-Za-z_][A-Za-z0-9_]*:alpha
+0:^a(bc+|b[eh])g|.h$:abh
+0:(bc+d$|ef*g.|h?i(j|k)):effgz
+0:(bc+d$|ef*g.|h?i(j|k)):ij
+1:(bc+d$|ef*g.|h?i(j|k)):effg
+1:(bc+d$|ef*g.|h?i(j|k)):bcdd
+0:(bc+d$|ef*g.|h?i(j|k)):reffgz
+1:((((((((((a)))))))))):-
+0:(((((((((a))))))))):a
+1:multiple words of text:uh-uh
+0:multiple words:multiple words, yeah
+0:(.*)c(.*):abcde
+1:\((.*),:(.*)\)
+1:[k]:ab
+0:abcd:abcd
+0:a(bc)d:abcd
+0:a[-]?c:ac
+0:(....).*\1:beriberi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Qaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mo'ammar Gadhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Kaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Qadhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar El Kadhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Gadafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qadafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamer El Kazzafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamar al-Gaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar Al Qathafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Al Qathafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mo'ammar el-Gadhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamar El Kadhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar al-Qadhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qadhdhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar Qadafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamar Gaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar Qadhdhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Khaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar al-Khaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'amar al-Kadafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Ghaddafy
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Ghadafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Ghaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muamar Kaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Quathafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Gheddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muamar Al-Kaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar Khadafy
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar Qudhafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qaddafi
+0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi
diff --git a/posix/TESTS2C.sed b/posix/TESTS2C.sed
new file mode 100644
index 0000000..28dd131
--- /dev/null
+++ b/posix/TESTS2C.sed
@@ -0,0 +1,2 @@
+s/\\/\\\\/g
+s/\(.*\):\(.*\):\(.*\)/ {\1, "\2", "\3"},/
diff --git a/posix/regex.c b/posix/regex.c
index 370a612..202ee19 100644
--- a/posix/regex.c
+++ b/posix/regex.c
@@ -35,8 +35,12 @@
#include <config.h>
#endif
+#if defined(STDC_HEADERS) && !defined(emacs)
+#include <stddef.h>
+#else
/* We need this for `regex.h', and perhaps for the Emacs include files. */
#include <sys/types.h>
+#endif
/* This is for other GNU distributions with internationalized messages. */
#if HAVE_LIBINTL_H || defined (_LIBC)
@@ -500,6 +504,7 @@ typedef enum
} while (0)
#ifdef DEBUG
+static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
static void
extract_number (dest, source)
int *dest;
@@ -527,6 +532,8 @@ extract_number (dest, source)
} while (0)
#ifdef DEBUG
+static void extract_number_and_incr _RE_ARGS ((int *destination,
+ unsigned char **source));
static void
extract_number_and_incr (destination, source)
int *destination;
@@ -890,6 +897,13 @@ print_double_string (where, string1, size1, string2, size2)
}
}
+void
+printchar (c)
+ int c;
+{
+ putc (c, stderr);
+}
+
#else /* not DEBUG */
#undef assert
@@ -1007,9 +1021,35 @@ static const char *re_error_msgid[] =
#endif
/* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+ exactly that if always used MAX_FAILURE_SPACE each time we failed.
This is a variable only so users of regex can assign to it; we never
change it ourselves. */
+
+#ifdef INT_IS_16BIT
+
+#if defined (MATCH_MAY_ALLOCATE)
+long re_max_failures = 4000;
+#else
+long re_max_failures = 2000;
+#endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ long integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned long size;
+ unsigned long avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#else /* not INT_IS_16BIT */
+
#if defined (MATCH_MAY_ALLOCATE)
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
@@ -1033,6 +1073,8 @@ typedef struct
unsigned avail; /* Offset of next open position. */
} fail_stack_type;
+#endif /* INT_IS_16BIT */
+
#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
@@ -1123,7 +1165,7 @@ typedef struct
/* Used to omit pushing failure point id's when we're not debugging. */
#ifdef DEBUG
#define DEBUG_PUSH PUSH_FAILURE_INT
-#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
+#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
#else
#define DEBUG_PUSH(item)
#define DEBUG_POP(item_addr)
@@ -1144,7 +1186,10 @@ typedef struct
char *destination; \
/* Must be int, so when we don't save any registers, the arithmetic \
of 0 + -1 isn't done as unsigned. */ \
- unsigned this_reg; \
+ /* Can't be int, since there is not a shred of a guarantee that int \
+ is wide enough to hold a value of something to which pointer can \
+ be assigned */ \
+ s_reg_t this_reg; \
\
DEBUG_STATEMENT (failure_id++); \
DEBUG_STATEMENT (nfailure_points_pushed++); \
@@ -1257,7 +1302,7 @@ typedef struct
#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
{ \
DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
- unsigned this_reg; \
+ s_reg_t this_reg; \
const unsigned char *string_temp; \
\
assert (!FAIL_STACK_EMPTY ()); \
@@ -1288,10 +1333,10 @@ typedef struct
DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
\
/* Restore register info. */ \
- high_reg = (unsigned) POP_FAILURE_INT (); \
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \
DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
\
- low_reg = (unsigned) POP_FAILURE_INT (); \
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \
DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
\
if (1) \
@@ -1336,6 +1381,9 @@ typedef struct
the type of `word', i.e., is something that fits into one item on the
failure stack. */
+
+/* Declarations and macros for re_match_2. */
+
typedef union
{
fail_stack_elt_t word;
@@ -1365,7 +1413,7 @@ typedef union
{ \
if (!set_regs_matched_done) \
{ \
- unsigned r; \
+ active_reg_t r; \
set_regs_matched_done = 1; \
for (r = lowest_active_reg; r <= highest_active_reg; r++) \
{ \
@@ -1384,11 +1432,25 @@ static char reg_unset_dummy;
/* Subroutine declarations and macros for regex_compile. */
-static void store_op1 (), store_op2 ();
-static void insert_op1 (), insert_op2 ();
-static boolean at_begline_loc_p (), at_endline_loc_p ();
-static boolean group_in_compile_stack ();
-static reg_errcode_t compile_range ();
+static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp));
+static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
+static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2));
+static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg, unsigned char *end));
+static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2, unsigned char *end));
+static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
+ reg_syntax_t syntax));
+static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
+ reg_syntax_t syntax));
+static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
+ const char *pend,
+ char *translate,
+ reg_syntax_t syntax,
+ unsigned char *b));
/* Fetch the next character in the uncompiled pattern---translating it
if necessary. Also cast from a signed character in the constant
@@ -1463,26 +1525,39 @@ static reg_errcode_t compile_range ();
/* Store a jump with opcode OP at LOC to location TO. We store a
relative address offset by the three bytes the jump itself occupies. */
#define STORE_JUMP(op, loc, to) \
- store_op1 (op, loc, (to) - (loc) - 3)
+ store_op1 (op, loc, (int) ((to) - (loc) - 3))
/* Likewise, for a two-argument jump. */
#define STORE_JUMP2(op, loc, to, arg) \
- store_op2 (op, loc, (to) - (loc) - 3, arg)
+ store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
#define INSERT_JUMP(op, loc, to) \
- insert_op1 (op, loc, (to) - (loc) - 3, b)
+ insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
#define INSERT_JUMP2(op, loc, to, arg) \
- insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+ insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
/* This is not an arbitrary limit: the arguments which represent offsets
into the pattern are two bytes long. So if 2^16 bytes turns out to
be too small, many things would have to change. */
+/* Any other compiler which, like MSC, has allocation limit below 2^16
+ bytes will have to use approach similar to what was done below for
+ MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
+ reallocating to 0 bytes. Such thing is not going to work too well.
+ You have been warned!! */
+#ifdef _MSC_VER
+/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
+ The REALLOC define eliminates a flurry of conversion warnings,
+ but is not required. */
+#define MAX_BUF_SIZE 65500L
+#define REALLOC(p,s) realloc ((p), (size_t) (s))
+#else
#define MAX_BUF_SIZE (1L << 16)
-
+#define REALLOC(p,s) realloc ((p), (s))
+#endif
/* Extend the buffer by twice its current size via realloc and
reset the pointers that pointed into the old block to point to the
@@ -1496,7 +1571,7 @@ static reg_errcode_t compile_range ();
bufp->allocated <<= 1; \
if (bufp->allocated > MAX_BUF_SIZE) \
bufp->allocated = MAX_BUF_SIZE; \
- bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
if (bufp->buffer == NULL) \
return REG_ESPACE; \
/* If the buffer moved, move all the pointers into it. */ \
@@ -1528,7 +1603,8 @@ typedef unsigned regnum_t;
/* Since offsets can go either forwards or backwards, this type needs to
be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
-typedef int pattern_offset_t;
+/* int may be not enough when sizeof(int) == 2. */
+typedef long pattern_offset_t;
typedef struct
{
@@ -1638,6 +1714,10 @@ regex_grow_registers (num_regs)
#endif /* not MATCH_MAY_ALLOCATE */
+static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
+ compile_stack,
+ regnum_t regnum));
+
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
Returns one of error codes defined in `regex.h', or zero for success.
@@ -1663,7 +1743,7 @@ regex_grow_registers (num_regs)
static reg_errcode_t
regex_compile (pattern, size, syntax, bufp)
const char *pattern;
- int size;
+ size_t size;
reg_syntax_t syntax;
struct re_pattern_buffer *bufp;
{
@@ -2104,6 +2184,9 @@ regex_compile (pattern, size, syntax, bufp)
|| (is_upper && ISUPPER (ch))
|| (is_xdigit && ISXDIGIT (ch)))
SET_LIST_BIT (ch);
+ if ( translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
}
had_char_class = true;
}
@@ -2529,38 +2612,54 @@ regex_compile (pattern, size, syntax, bufp)
case 'w':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
laststart = b;
BUF_PUSH (wordchar);
break;
case 'W':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
laststart = b;
BUF_PUSH (notwordchar);
break;
case '<':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
BUF_PUSH (wordbeg);
break;
case '>':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
BUF_PUSH (wordend);
break;
case 'b':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
BUF_PUSH (wordbound);
break;
case 'B':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
BUF_PUSH (notwordbound);
break;
case '`':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
BUF_PUSH (begbuf);
break;
case '\'':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
BUF_PUSH (endbuf);
break;
@@ -2575,7 +2674,7 @@ regex_compile (pattern, size, syntax, bufp)
FREE_STACK_RETURN (REG_ESUBREG);
/* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack (compile_stack, c1))
+ if (group_in_compile_stack (compile_stack, (regnum_t) c1))
goto normal_char;
laststart = b;
@@ -2802,7 +2901,7 @@ at_begline_loc_p (pattern, p, syntax)
static boolean
at_endline_loc_p (p, pend, syntax)
const char *p, *pend;
- int syntax;
+ reg_syntax_t syntax;
{
const char *next = p;
boolean next_backslash = *next == '\\';
@@ -2924,13 +3023,14 @@ re_compile_fastmap (bufp)
register char *fastmap = bufp->fastmap;
unsigned char *pattern = bufp->buffer;
- unsigned long size = bufp->used;
unsigned char *p = pattern;
- register unsigned char *pend = pattern + size;
+ register unsigned char *pend = pattern + bufp->used;
+#ifdef REL_ALLOC
/* This holds the pointer to the failure stack, when
it is allocated relocatably. */
fail_stack_elt_t *failure_stack_ptr;
+#endif
/* Assume that each path through the pattern can be null until
proven otherwise. We set this false at the bottom of switch
@@ -3406,13 +3506,6 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
return -1;
} /* re_search_2 */
-/* Declarations and macros for re_match_2. */
-
-static int bcmp_translate ();
-static boolean alt_match_null_string_p (),
- common_op_match_null_string_p (),
- group_match_null_string_p ();
-
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
#define POINTER_TO_OFFSET(ptr) \
@@ -3453,14 +3546,12 @@ static boolean alt_match_null_string_p (),
: (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
== Sword)
-/* Disabled due to a compiler bug -- see comment at case wordbound */
-#if 0
/* Test if the character before D and the one at D differ with respect
to being word-constituent. */
#define AT_WORD_BOUNDARY(d) \
(AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
|| WORDCHAR_P (d - 1) != WORDCHAR_P (d))
-#endif
+
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
@@ -3506,11 +3597,26 @@ re_match (bufp, string, size, pos, regs)
{
int result = re_match_2_internal (bufp, NULL, 0, string, size,
pos, regs, size);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
alloca (0);
+#endif
+#endif
return result;
}
#endif /* not emacs */
+static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
+ int len, char *translate));
/* re_match_2 matches the compiled pattern in BUFP against the
the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
@@ -3536,7 +3642,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
{
int result = re_match_2_internal (bufp, string1, size1, string2, size2,
pos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
alloca (0);
+#endif
+#endif
return result;
}
@@ -3593,18 +3703,20 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
+#ifdef REL_ALLOC
/* This holds the pointer to the failure stack, when
it is allocated relocatably. */
fail_stack_elt_t *failure_stack_ptr;
+#endif
/* We fill all the registers internally, independent of what we
return, for use in backreferences. The number here includes
an element for register zero. */
- unsigned num_regs = bufp->re_nsub + 1;
+ size_t num_regs = bufp->re_nsub + 1;
/* The currently active registers. */
- unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
- unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
/* Information on the contents of registers. These are pointers into
the input strings; they record just what was matched (on this
@@ -4511,10 +4623,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
= *p2 == (unsigned char) endline ? '\n' : p2[2];
#endif
+#if 0
if ((re_opcode_t) p1[3] == exactn
&& ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
&& (p2[2 + p1[5] / BYTEWIDTH]
& (1 << (p1[5] % BYTEWIDTH)))))
+#else
+ if ((re_opcode_t) p1[3] == exactn
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
+ && (p2[2 + p1[4] / BYTEWIDTH]
+ & (1 << (p1[4] % BYTEWIDTH)))))
+#endif
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
@@ -4580,7 +4699,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
actual values. Otherwise, we will restore only one
register from the stack, since lowest will == highest in
`pop_failure_point'. */
- unsigned dummy_low_reg, dummy_high_reg;
+ active_reg_t dummy_low_reg, dummy_high_reg;
unsigned char *pdummy;
const char *sdummy;
@@ -5133,11 +5252,12 @@ common_op_match_null_string_p (p, end, reg_info)
static int
bcmp_translate (s1, s2, len, translate)
- unsigned char *s1, *s2;
+ const char *s1, *s2;
register int len;
RE_TRANSLATE_TYPE translate;
{
- register unsigned char *p1 = s1, *p2 = s2;
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
while (len)
{
if (translate[*p1++] != translate[*p2++]) return 1;
@@ -5160,7 +5280,7 @@ bcmp_translate (s1, s2, len, translate)
const char *
re_compile_pattern (pattern, length, bufp)
const char *pattern;
- int length;
+ size_t length;
struct re_pattern_buffer *bufp;
{
reg_errcode_t ret;
@@ -5289,7 +5409,7 @@ regcomp (preg, pattern, cflags)
int cflags;
{
reg_errcode_t ret;
- unsigned syntax
+ reg_syntax_t syntax
= (cflags & REG_EXTENDED) ?
RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
diff --git a/posix/regex.h b/posix/regex.h
index 80f9c6d..ae0165e 100644
--- a/posix/regex.h
+++ b/posix/regex.h
@@ -37,16 +37,23 @@ extern "C" {
#include <stddef.h>
#endif
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
remains the value 0. The bits are given in alphabetical order, and
the definitions shifted by one from the previous bit; thus, when we
add or remove a bit, only one other definition need change. */
-typedef unsigned reg_syntax_t;
+typedef unsigned long int reg_syntax_t;
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
@@ -141,6 +148,10 @@ typedef unsigned reg_syntax_t;
without further backtracking. */
#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
@@ -154,13 +165,18 @@ extern reg_syntax_t re_syntax_options;
#define RE_SYNTAX_EMACS 0
#define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS))
#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_NO_GNU_OPS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
@@ -216,7 +232,8 @@ extern reg_syntax_t re_syntax_options;
#ifdef RE_DUP_MAX
#undef RE_DUP_MAX
#endif
-#define RE_DUP_MAX ((1 << 15) - 1)
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
/* POSIX `cflags' bits (i.e., information for `regcomp'). */
@@ -417,7 +434,7 @@ extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
and syntax given by the global `re_syntax_options', into the buffer
BUFFER. Return NULL if successful, and an error string if not. */
extern const char *re_compile_pattern
- _RE_ARGS ((const char *pattern, int length,
+ _RE_ARGS ((const char *pattern, size_t length,
struct re_pattern_buffer *buffer));
@@ -476,10 +493,12 @@ extern void re_set_registers
unsigned num_regs, regoff_t *starts, regoff_t *ends));
#ifdef _REGEX_RE_COMP
+#ifndef _CRAY
/* 4.2 bsd compatibility. */
extern char *re_comp _RE_ARGS ((const char *));
extern int re_exec _RE_ARGS ((const char *));
#endif
+#endif
/* POSIX compatibility. */
extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
diff --git a/posix/runtests.c b/posix/runtests.c
new file mode 100644
index 0000000..b6a292e
--- /dev/null
+++ b/posix/runtests.c
@@ -0,0 +1,132 @@
+/***********************************************************
+
+Copyright 1995 by Tom Lord
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of the copyright holder not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+Tom Lord DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+EVENT SHALL TOM LORD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+******************************************************************/
+
+
+
+#include <sys/types.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+
+
+struct a_test
+{
+ int expected;
+ const char * pattern;
+ const unsigned char * data;
+};
+
+static const struct a_test the_tests[] =
+{
+#include "testcases.h"
+ {-1, 0, 0}
+};
+
+
+
+
+int
+run_a_test (int id, const struct a_test * t)
+{
+ static const char * last_pattern = 0;
+ static regex_t r;
+ int err;
+ char errmsg[100];
+ int x;
+ regmatch_t regs[10];
+
+ if (!last_pattern || strcmp (last_pattern, t->pattern))
+ {
+ if (last_pattern)
+ regfree (&r);
+ last_pattern = t->pattern;
+ err = regcomp (&r, t->pattern, REG_EXTENDED);
+ if (err)
+ {
+ if (t->expected)
+ {
+ puts (" OK.");
+ return 0;
+ }
+ regerror (err, &r, errmsg, 100);
+ printf ("test %d\n", id);
+ puts (errmsg);
+ return 1;
+ }
+ }
+
+ err = regexec (&r, t->data, 10, regs, 0);
+
+ if (err != t->expected)
+ {
+ printf ("test %d\n", id);
+ printf ("pattern \"%s\" data \"%s\" wanted %d got %d\n",
+ t->pattern, t->data, t->expected, err);
+ for (x = 0; x < 10; ++x)
+ printf ("reg %d == (%d, %d) %.*s\n",
+ x,
+ regs[x].rm_so,
+ regs[x].rm_eo,
+ regs[x].rm_eo - regs[x].rm_so,
+ t->data + regs[x].rm_so);
+ return 1;
+ }
+ puts (" OK.");
+ return 0;
+}
+
+
+
+int
+main (int argc, char * argv[])
+{
+ int x;
+ int lo;
+ int hi;
+ int res = 0;
+
+ lo = 0;
+ hi = (sizeof (the_tests) / sizeof (the_tests[0])) - 1;
+
+ if (argc > 1)
+ {
+ lo = atoi (argv[1]);
+ hi = lo + 1;
+
+ if (argc > 2)
+ hi = atoi (argv[2]);
+ }
+
+ for (x = lo; x < hi; ++x)
+ {
+ printf ("#%d:", x);
+ res |= run_a_test (x, &the_tests[x]);
+ }
+ {
+ exit (0);
+ }
+}
+
+
diff --git a/posix/testcases.h b/posix/testcases.h
new file mode 100644
index 0000000..c471ca0
--- /dev/null
+++ b/posix/testcases.h
@@ -0,0 +1,159 @@
+ {0, "(.*)*\\1", "xx"},
+ {0, "^", ""},
+ {0, "$", ""},
+ {0, "^$", ""},
+ {0, "^a$", "a"},
+ {0, "abc", "abc"},
+ {1, "abc", "xbc"},
+ {1, "abc", "axc"},
+ {1, "abc", "abx"},
+ {0, "abc", "xabcy"},
+ {0, "abc", "ababc"},
+ {0, "ab*c", "abc"},
+ {0, "ab*bc", "abc"},
+ {0, "ab*bc", "abbc"},
+ {0, "ab*bc", "abbbbc"},
+ {0, "ab+bc", "abbc"},
+ {1, "ab+bc", "abc"},
+ {1, "ab+bc", "abq"},
+ {0, "ab+bc", "abbbbc"},
+ {0, "ab?bc", "abbc"},
+ {0, "ab?bc", "abc"},
+ {1, "ab?bc", "abbbbc"},
+ {0, "ab?c", "abc"},
+ {0, "^abc$", "abc"},
+ {1, "^abc$", "abcc"},
+ {0, "^abc", "abcc"},
+ {1, "^abc$", "aabc"},
+ {0, "abc$", "aabc"},
+ {0, "^", "abc"},
+ {0, "$", "abc"},
+ {0, "a.c", "abc"},
+ {0, "a.c", "axc"},
+ {0, "a.*c", "axyzc"},
+ {1, "a.*c", "axyzd"},
+ {1, "a[bc]d", "abc"},
+ {0, "a[bc]d", "abd"},
+ {1, "a[b-d]e", "abd"},
+ {0, "a[b-d]e", "ace"},
+ {0, "a[b-d]", "aac"},
+ {0, "a[-b]", "a-"},
+ {0, "a[b-]", "a-"},
+ {1, "a[b-a]", "-"},
+ {2, "a[]b", "-"},
+ {2, "a[", "-"},
+ {0, "a]", "a]"},
+ {0, "a[]]b", "a]b"},
+ {0, "a[^bc]d", "aed"},
+ {1, "a[^bc]d", "abd"},
+ {0, "a[^-b]c", "adc"},
+ {1, "a[^-b]c", "a-c"},
+ {1, "a[^]b]c", "a]c"},
+ {0, "a[^]b]c", "adc"},
+ {0, "ab|cd", "abc"},
+ {0, "ab|cd", "abcd"},
+ {0, "()ef", "def"},
+ {0, "()*", "-"},
+ {1, "*a", "-"},
+ {0, "^*", "-"},
+ {0, "$*", "-"},
+ {1, "(*)b", "-"},
+ {1, "$b", "b"},
+ {2, "a\\", "-"},
+ {0, "a\\(b", "a(b"},
+ {0, "a\\(*b", "ab"},
+ {0, "a\\(*b", "a((b"},
+ {1, "a\\x", "a\\x"},
+ {1, "abc)", "-"},
+ {2, "(abc", "-"},
+ {0, "((a))", "abc"},
+ {0, "(a)b(c)", "abc"},
+ {0, "a+b+c", "aabbabc"},
+ {0, "a**", "-"},
+ {0, "a*?", "-"},
+ {0, "(a*)*", "-"},
+ {0, "(a*)+", "-"},
+ {0, "(a|)*", "-"},
+ {0, "(a*|b)*", "-"},
+ {0, "(a+|b)*", "ab"},
+ {0, "(a+|b)+", "ab"},
+ {0, "(a+|b)?", "ab"},
+ {0, "[^ab]*", "cde"},
+ {0, "(^)*", "-"},
+ {0, "(ab|)*", "-"},
+ {2, ")(", "-"},
+ {1, "abc", ""},
+ {1, "abc", ""},
+ {0, "a*", ""},
+ {0, "([abc])*d", "abbbcd"},
+ {0, "([abc])*bcd", "abcd"},
+ {0, "a|b|c|d|e", "e"},
+ {0, "(a|b|c|d|e)f", "ef"},
+ {0, "((a*|b))*", "-"},
+ {0, "abcd*efg", "abcdefg"},
+ {0, "ab*", "xabyabbbz"},
+ {0, "ab*", "xayabbbz"},
+ {0, "(ab|cd)e", "abcde"},
+ {0, "[abhgefdc]ij", "hij"},
+ {1, "^(ab|cd)e", "abcde"},
+ {0, "(abc|)ef", "abcdef"},
+ {0, "(a|b)c*d", "abcd"},
+ {0, "(ab|ab*)bc", "abc"},
+ {0, "a([bc]*)c*", "abc"},
+ {0, "a([bc]*)(c*d)", "abcd"},
+ {0, "a([bc]+)(c*d)", "abcd"},
+ {0, "a([bc]*)(c+d)", "abcd"},
+ {0, "a[bcd]*dcdcde", "adcdcde"},
+ {1, "a[bcd]+dcdcde", "adcdcde"},
+ {0, "(ab|a)b*c", "abc"},
+ {0, "((a)(b)c)(d)", "abcd"},
+ {0, "[A-Za-z_][A-Za-z0-9_]*", "alpha"},
+ {0, "^a(bc+|b[eh])g|.h$", "abh"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "effgz"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "ij"},
+ {1, "(bc+d$|ef*g.|h?i(j|k))", "effg"},
+ {1, "(bc+d$|ef*g.|h?i(j|k))", "bcdd"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "reffgz"},
+ {1, "((((((((((a))))))))))", "-"},
+ {0, "(((((((((a)))))))))", "a"},
+ {1, "multiple words of text", "uh-uh"},
+ {0, "multiple words", "multiple words, yeah"},
+ {0, "(.*)c(.*)", "abcde"},
+ {1, "\\((.*),", "(.*)\\)"},
+ {1, "[k]", "ab"},
+ {0, "abcd", "abcd"},
+ {0, "a(bc)d", "abcd"},
+ {0, "a[-]?c", "ac"},
+ {0, "(....).*\\1", "beriberi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar Gadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar El Kadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamer El Kazzafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar al-Gaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Al Qathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Al Qathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar el-Gadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar El Kadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Qadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadhdhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar Gaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadhdhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Khaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Khaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'amar al-Kadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafy"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Quathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gheddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Al-Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Khadafy "},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Qudhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi"},