aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2025-08-09 20:41:45 +1000
committerSteve Bennett <steveb@workware.net.au>2025-08-13 08:05:25 +1000
commit00b27e1bcf275bfebe62ff882ef17f3401bdbea0 (patch)
tree1d9d1041c5fd38713d7cb8b116b0f3858a23563e
parent3c89e1bffa6fa0d7e66657a0ab7a29a1066dc536 (diff)
downloadjimtcl-00b27e1bcf275bfebe62ff882ef17f3401bdbea0.zip
jimtcl-00b27e1bcf275bfebe62ff882ef17f3401bdbea0.tar.gz
jimtcl-00b27e1bcf275bfebe62ff882ef17f3401bdbea0.tar.bz2
regexp, regsub: add support for -lineanchor and -linestop
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r--jim-regexp.c53
-rw-r--r--jimregexp.c6
-rw-r--r--jimregexp.h5
-rw-r--r--tests/regexp.test74
-rw-r--r--tests/regexp2.test36
5 files changed, 97 insertions, 77 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index cf06e68..c1909c0 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -122,6 +122,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int opt_indices = 0;
int opt_all = 0;
int opt_inline = 0;
+ int opt_lineanchor = 0;
regex_t *regex;
int match, i, j;
int offset = 0;
@@ -137,10 +138,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int eflags = 0;
int option;
enum {
- OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
+ OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_LINESTOP, OPT_LINEANCHOR, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
- "-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL
+ "-indices", "-nocase", "-line", "-linestop", "-lineanchor", "-all", "-inline", "-start", "-expanded", "--", NULL
};
for (i = 1; i < argc; i++) {
@@ -167,8 +168,20 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
case OPT_LINE:
regcomp_flags |= REG_NEWLINE;
+ opt_lineanchor = 1;
break;
+#ifdef REG_NEWLINE_STOP
+ case OPT_LINESTOP:
+ regcomp_flags |= REG_NEWLINE_STOP;
+ break;
+#endif
+#ifdef REG_NEWLINE_ANCHOR
+ case OPT_LINEANCHOR:
+ regcomp_flags |= REG_NEWLINE_ANCHOR;
+ opt_lineanchor = 1;
+ break;
+#endif
case OPT_ALL:
opt_all = 1;
break;
@@ -186,14 +199,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
}
break;
- case OPT_EXPANDED:
#ifdef REG_EXPANDED
+ case OPT_EXPANDED:
regcomp_flags |= REG_EXPANDED;
break;
-#else
+#endif
+ default:
+ /* Could get here if -linestop or -lineanchor or -expanded is not supported */
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
return JIM_ERR;
-#endif
}
}
if (argc - i < 2) {
@@ -313,7 +327,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
}
}
- if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
+ if (opt_all && (pattern[0] != '^' || opt_lineanchor) && *source_str) {
if (pmatch[0].rm_eo) {
offset += utf8_strlen(source_str, pmatch[0].rm_eo);
source_str += pmatch[0].rm_eo;
@@ -369,10 +383,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
const char *pattern;
int option;
enum {
- OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
+ OPT_NOCASE, OPT_LINE, OPT_LINESTOP, OPT_LINEANCHOR, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
- "-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL
+ "-nocase", "-line", "-linestop", "-lineanchor", "-all", "-start", "-command", "-expanded", "--", NULL
};
for (i = 1; i < argc; i++) {
@@ -397,6 +411,16 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
regcomp_flags |= REG_NEWLINE;
break;
+#ifdef REG_NEWLINE_STOP
+ case OPT_LINESTOP:
+ regcomp_flags |= REG_NEWLINE_STOP;
+ break;
+#endif
+#ifdef REG_NEWLINE_ANCHOR
+ case OPT_LINEANCHOR:
+ regcomp_flags |= REG_NEWLINE_ANCHOR;
+ break;
+#endif
case OPT_ALL:
opt_all = 1;
break;
@@ -414,14 +438,16 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
opt_command = 1;
break;
- case OPT_EXPANDED:
#ifdef REG_EXPANDED
+ case OPT_EXPANDED:
regcomp_flags |= REG_EXPANDED;
break;
-#else
+#endif
+
+ default:
+ /* Could get here if -linestop or -lineanchor or -expanded is not supported */
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
return JIM_ERR;
-#endif
}
}
if (argc - i != 3 && argc - i != 4) {
@@ -583,11 +609,6 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
break;
}
- /* An anchored pattern without -line must be done */
- if ((regcomp_flags & REG_NEWLINE) == 0 && pattern[0] == '^') {
- break;
- }
-
regexec_flags = 0;
if (pmatch[0].rm_eo == pmatch[0].rm_so) {
/* Matched a zero length string. Need to avoid matching the same position again */
diff --git a/jimregexp.c b/jimregexp.c
index ca755c2..86f99ce 100644
--- a/jimregexp.c
+++ b/jimregexp.c
@@ -1234,7 +1234,7 @@ int jim_regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t
}
if (*string) {
nextline:
- if (preg->cflags & REG_NEWLINE) {
+ if (preg->cflags & REG_NEWLINE_ANCHOR) {
/* Try the next anchor? */
string = strchr(string, '\n');
if (string) {
@@ -1369,12 +1369,12 @@ static const char *str_find(const char *string, int c, int nocase)
/**
* Returns true if 'ch' is an end-of-line char.
*
- * In REG_NEWLINE mode, \n is considered EOL in
+ * In REG_NEWLINE_STOP mode, \n is considered EOL in
* addition to \0
*/
static int reg_iseol(regex_t *preg, int ch)
{
- if (preg->cflags & REG_NEWLINE) {
+ if (preg->cflags & REG_NEWLINE_STOP) {
return ch == '\0' || ch == '\n';
}
else {
diff --git a/jimregexp.h b/jimregexp.h
index 86a94f6..b6058c7 100644
--- a/jimregexp.h
+++ b/jimregexp.h
@@ -70,8 +70,11 @@ typedef struct regexp {
typedef regexp regex_t;
#define REG_EXTENDED 0
-#define REG_NEWLINE 1
#define REG_ICASE 2
+#define REG_NEWLINE_ANCHOR 4
+#define REG_NEWLINE_STOP 8
+/* REG_NEWLINE is POSIX */
+#define REG_NEWLINE (REG_NEWLINE_ANCHOR | REG_NEWLINE_STOP)
#define REG_NOTBOL 16
#define REG_EXPANDED 32
diff --git a/tests/regexp.test b/tests/regexp.test
index 681b793..b911014 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -37,12 +37,12 @@ test regexp-1.5 {basic regexp operation} {
test regexp-1.6 {basic regexp operation} regexp_are {
list [catch {regexp {} abc} msg] $msg
} {0 1}
-#test regexp-1.7 {regexp utf compliance} {
-# # if not UTF-8 aware, result is "0 1"
-# set foo "\u4e4eb q"
-# regexp "\u4e4eb q" "a\u4e4eb qw\u5e4e\x4e wq" bar
-# list [string compare $foo $bar] [regexp 4 $bar]
-#} {0 0}
+test regexp-1.7 {regexp utf compliance} {
+ # if not UTF-8 aware, result is "0 1"
+ set foo "\u4e4eb q"
+ regexp "\u4e4eb q" "a\u4e4eb qw\u5e4e\x4e wq" bar
+ list [string compare $foo $bar] [regexp 4 $bar]
+} {0 0}
test regexp-2.1 {getting substrings back from regexp} {
@@ -201,9 +201,9 @@ test regexp-6.2 {regexp errors} -body {
list [catch {regexp -nocase a} msg] $msg
} -result {1 {wrong # args: should be "regexp ?-option ...? exp string ?matchVar? ?subMatchVar ...?"}}
-test regexp-6.3 {regexp errors} -constraints jim -body {
+test regexp-6.3 {regexp errors} -body {
list [catch {regexp -gorp a} msg] $msg
-} -result {1 {bad option "-gorp": must be -all, -expanded, -indices, -inline, -line, -nocase, -start, or --}}
+} -result {1 {bad option "-gorp": must be -all, -expanded, -indices, -inline, -line, -lineanchor, -linestop, -nocase, -start, or --}}
test regexp-6.4 {regexp errors} {
catch {regexp a( b} msg
@@ -283,12 +283,12 @@ test regexp-7.16 {basic regsub operation} {
set foo xxx
list [regsub x "" y foo] $foo
} {0 {}}
-#test regexp-7.17 {regsub utf compliance} {
-# # if not UTF-8 aware, result is "0 1"
-# set foo "xyz555ijka\u4e4ebpqr"
-# regsub a\u4e4eb xyza\u4e4ebijka\u4e4ebpqr 555 bar
-# list [string compare $foo $bar] [regexp 4 $bar]
-#} {0 0}
+test regexp-7.17 {regsub utf compliance} {
+ # if not UTF-8 aware, result is "0 1"
+ set foo "xyz555ijka\u4e4ebpqr"
+ regsub a\u4e4eb xyza\u4e4ebijka\u4e4ebpqr 555 bar
+ list [string compare $foo $bar] [regexp 4 $bar]
+} {0 0}
test regexp-8.1 {case conversion in regsub} {
list [regsub -nocase a(a+) xaAAaAAay & foo] $foo
@@ -348,14 +348,14 @@ test regexp-10.3 {newline sensitivity in regsub} {
set foo xxx
list [regsub -line {^a.*b$} "dabc\naxyb\nxb" 123 foo] $foo
} "1 {dabc\n123\nxb}"
-#test regexp-10.4 {partial newline sensitivity in regsub} {
-# set foo xxx
-# list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
-#} "1 {da\n123}"
-#test regexp-10.5 {inverse partial newline sensitivity in regsub} {
-# set foo xxx
-# list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
-#} "1 {da\nb123\nxb}"
+test regexp-10.4 {partial newline sensitivity in regsub} {
+ set foo xxx
+ list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
+} "1 {da\n123}"
+test regexp-10.5 {inverse partial newline sensitivity in regsub} {
+ set foo xxx
+ list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
+} "1 {da\nb123\nxb}"
test regexp-11.1 {regsub errors} {
list [catch {regsub a b} msg] $msg
@@ -370,13 +370,9 @@ test regexp-11.4 {regsub errors} {
list [catch {regsub a b c d e f} msg] $msg
} {1 {wrong # args: should be "regsub ?-option ...? exp string subSpec ?varName?"}}
-test regexp-11.5 {regsub errors} -constraints jim -body {
- list [catch {regsub -gorp a b c} msg] $msg
-} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -nocase, -start, or --}}
-
-test regexp-11.5 {regsub errors} -constraints tcl -body {
+test regexp-11.5 {regsub errors} -body {
list [catch {regsub -gorp a b c} msg] $msg
-} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}}
+} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -lineanchor, -linestop, -nocase, -start, or --}}
test regexp-11.6 {regsub errors} {
catch {regsub -nocase a( b c d} msg
@@ -605,14 +601,14 @@ test regexp-19.1 {regsub null replacement} {
list $result [string length $result]
} "\0a\0hel\0a\0lo\0a\0 14"
-#test regexp-20.1 {regsub shared object shimmering} {
-# # Bug #461322
-# set a abcdefghijklmnopqurstuvwxyz
-# set b $a
-# set c abcdefghijklmnopqurstuvwxyz0123456789
-# regsub $a $c $b d
-# list $d [string length $d] [string bytelength $d]
-#} [list abcdefghijklmnopqurstuvwxyz0123456789 37 37]
+test regexp-20.1 {regsub shared object shimmering} {
+ # Bug #461322
+ set a abcdefghijklmnopqurstuvwxyz
+ set b $a
+ set c abcdefghijklmnopqurstuvwxyz0123456789
+ regsub $a $c $b d
+ list $d [string length $d] [string bytelength $d]
+} [list abcdefghijklmnopqurstuvwxyz0123456789 37 37]
#test regexp-20.2 {regsub shared object shimmering with -about} {
# eval regexp -about abc
#} {0 {}}
@@ -653,9 +649,9 @@ test regexp-21.9 {regexp works with empty string offset} {
regexp -start 3 -- \$ {123}
} {1}
-#test regexp-21.10 {multiple matches handle newlines} {
-# regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n
-#} "foo\nfoo\nfoo\n"
+test regexp-21.10 {multiple matches handle newlines} {
+ regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n
+} "foo\nfoo\nfoo\n"
test regexp-21.11 {multiple matches handle newlines} {
regsub -all -line -- ^ "a\nb\nc" \#
diff --git a/tests/regexp2.test b/tests/regexp2.test
index 2b8e2bc..c965cf9 100644
--- a/tests/regexp2.test
+++ b/tests/regexp2.test
@@ -481,18 +481,18 @@ test regexpComp-10.3 {newline sensitivity in regsub} {
list [regsub -line {^a.*b$} "dabc\naxyb\nxb" 123 foo] $foo
}
} "1 {dabc\n123\nxb}"
-#test regexpComp-10.4 {partial newline sensitivity in regsub} {
-# evalInProc {
-# set foo xxx
-# list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
-# }
-#} "1 {da\n123}"
-#test regexpComp-10.5 {inverse partial newline sensitivity in regsub} {
-# evalInProc {
-# set foo xxx
-# list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
-# }
-#} "1 {da\nb123\nxb}"
+test regexpComp-10.4 {partial newline sensitivity in regsub} {
+ evalInProc {
+ set foo xxx
+ list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
+ }
+} "1 {da\n123}"
+test regexpComp-10.5 {inverse partial newline sensitivity in regsub} {
+ evalInProc {
+ set foo xxx
+ list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
+ }
+} "1 {da\nb123\nxb}"
test regexpComp-10.6 {\Z only matching end of string with -line} {
evalInProc {
set foo xxx
@@ -528,7 +528,7 @@ test regexpComp-11.5 {regsub errors} -body {
evalInProc {
list [catch {regsub -gorp a b c} msg] $msg
}
-} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -nocase, -start, or --}}
+} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -lineanchor, -linestop, -nocase, -start, or --}}
test regexpComp-11.6 {regsub errors} {
evalInProc {
@@ -807,11 +807,11 @@ test regexpComp-21.10 {regexp command compiling tests} {
}
} {3 barfbarobaro}
# This useless expression fails. Jim returns "bar"
-#test regexpComp-21.11 {regexp command compiling tests} {
-# evalInProc {
-# list [regsub -all "" "" bar str] $str
-# }
-#} {0 {}}
+test regexpComp-21.11 {regexp command compiling tests} {
+ evalInProc {
+ list [regsub -all "" "" bar str] $str
+ }
+} {0 {}}
test regexpComp-21.12 {regexp empty pattern with utf8} utf8 {
# Make sure the second char isn't sliced up
evalInProc {