aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2024-11-07 08:59:39 +1000
committerSteve Bennett <steveb@workware.net.au>2025-03-13 10:26:17 +1000
commit849244f6b9350a42e3498f4e4aff6887e19539e5 (patch)
tree2460eebcb8224b91ab445a3f09c96b32fa6e37d9
parent7c1007b013cfb0ae9396e86ef5bafdd5494184c4 (diff)
downloadjimtcl-master-next.zip
jimtcl-master-next.tar.gz
jimtcl-master-next.tar.bz2
regexp: Handle non repeat count {...}master-next
If it does not contain a number, treat it as an exact match. Fixes #323 Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r--jimregexp.c40
-rw-r--r--tests/regexp2.test20
2 files changed, 43 insertions, 17 deletions
diff --git a/jimregexp.c b/jimregexp.c
index 1fe6d8b..ca755c2 100644
--- a/jimregexp.c
+++ b/jimregexp.c
@@ -164,8 +164,7 @@
*/
#define FAIL(R,M) { (R)->err = (M); return (M); }
-#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?' || (c) == '{')
-#define META "^$.[()|?{+*"
+#define META "^$.[()|"
/*
* Flags to be passed up and down.
@@ -203,6 +202,22 @@ static void regdump(regex_t *preg);
static const char *regprop( int op );
#endif
+/* Returns 1 if *s is '*', '+', '?', or {n...} where n must be a number */
+static int str_is_mult(const char *s)
+{
+ switch (*s) {
+ case '*':
+ case '+':
+ case '?':
+ return 1;
+ case '{':
+ if (isdigit(UCHAR(s[1]))) {
+ return 1;
+ }
+ break;
+ }
+ return 0;
+}
/**
* Returns the length of the null-terminated integer sequence.
@@ -494,12 +509,12 @@ static int regpiece(regex_t *preg, int *flagp)
if (ret == 0)
return 0;
- op = *preg->regparse;
- if (!ISMULT(op)) {
+ if (!str_is_mult(preg->regparse)) {
*flagp = flags;
return(ret);
}
+ op = *preg->regparse;
if (!(flags&HASWIDTH) && op != '?') {
preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
return 0;
@@ -568,7 +583,7 @@ static int regpiece(regex_t *preg, int *flagp)
}
preg->regparse++;
- if (ISMULT(*preg->regparse)) {
+ if (str_is_mult(preg->regparse)) {
preg->err = REG_ERR_NESTED_COUNT;
return 0;
}
@@ -916,12 +931,6 @@ cc_switch:
case ')':
preg->err = REG_ERR_INTERNAL;
return 0; /* Supposed to be caught earlier. */
- case '?':
- case '+':
- case '*':
- case '{':
- preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
- return 0;
case '\\':
ch = *preg->regparse++;
switch (ch) {
@@ -986,6 +995,11 @@ cc_switch:
/* Back up to pick up the first char of interest */
preg->regparse -= n;
+ if (str_is_mult(preg->regparse)) {
+ preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
+ return 0;
+ }
+
ret = regnode(preg, EXACTLY);
/* Note that a META operator such as ? or * consumes the
@@ -995,7 +1009,7 @@ cc_switch:
*/
/* Until end of string or a META char is reached */
- while (*preg->regparse && strchr(META, *preg->regparse) == NULL) {
+ while (*preg->regparse && strchr(META, *preg->regparse) == NULL && !str_is_mult(preg->regparse)) {
n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE));
if (ch == '\\' && preg->regparse[n]) {
/* Non-trailing backslash.
@@ -1020,7 +1034,7 @@ cc_switch:
* Check to see if the following char is a MULT
*/
- if (ISMULT(preg->regparse[n])) {
+ if (str_is_mult(&preg->regparse[n])) {
/* Yes. But do we already have some EXACTLY chars? */
if (added) {
/* Yes, so return what we have and pick up the current char next time around */
diff --git a/tests/regexp2.test b/tests/regexp2.test
index ebcf01b..4d915c2 100644
--- a/tests/regexp2.test
+++ b/tests/regexp2.test
@@ -943,19 +943,31 @@ test regexp-25.3 {End of word} {
regexp {\mcd\M} cdef
} 0
-test regexp-26.1 {regexp operator =~} {
+test regexp-25.4 {Braces not a repeat count} {
+ regexp "{abc}" "test{abc}def"
+} 1
+
+test regexp-25.5 {Repeat follows nothing} -body {
+ regexp "{3}" "test{3}def"
+} -returnCodes error -match glob -result {couldn't compile regular expression pattern: *}
+
+test regexp-25.6 {Meta char after nothing is error} -body {
+ regexp "?" "te?st"
+} -returnCodes error -match glob -result {couldn't compile regular expression pattern: *}
+
+test regexp-26.1 {regexp operator =~} jim {
expr {"abc" =~ "^a"}
} 1
-test regexp-26.2 {regexp operator =~} {
+test regexp-26.2 {regexp operator =~} jim {
expr {"abc" =~ "^b"}
} 0
-test regexp-26.2 {regexp operator =~} {
+test regexp-26.2 {regexp operator =~} jim {
expr {"abc" =~ ".b."}
} 1
-test regexp-26.3 {regexp operator =~ invalid regexp} -body {
+test regexp-26.3 {regexp operator =~ invalid regexp} -constraints jim -body {
expr {"abc" =~ {[}}
} -returnCodes error -result {couldn't compile regular expression pattern: brackets [] not balanced}