aboutsummaryrefslogtreecommitdiff
path: root/jim-regexp.c
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2010-10-16 21:07:06 +1000
committerSteve Bennett <steveb@workware.net.au>2010-10-20 10:30:34 +1000
commit9fcb8099151969bc148ecd69934ee061b77eb2bd (patch)
tree7d988c45f87e63b5dfe105c554315f2fd3a5aad4 /jim-regexp.c
parentdad7d138ad9d14cc9af7ec2139ac860c4bdf9f27 (diff)
downloadjimtcl-9fcb8099151969bc148ecd69934ee061b77eb2bd.zip
jimtcl-9fcb8099151969bc148ecd69934ee061b77eb2bd.tar.gz
jimtcl-9fcb8099151969bc148ecd69934ee061b77eb2bd.tar.bz2
Fix some outlier issues with regexp/regsub
Use number of paren expressions with regexp -inline Fix a problem with regexp -start and an anchored search Fix regexp -inline -indices Fix regexp matching with an empty pattern (e.g. ^) Fix some cases where regsub -all did not return all results Fix some regsub cases with an empty pattern Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'jim-regexp.c')
-rw-r--r--jim-regexp.c41
1 files changed, 29 insertions, 12 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index 89861c4..f41d8aa 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -127,7 +127,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
if (argc < 3) {
wrongNumArgs:
Jim_WrongNumArgs(interp, 1, argv,
- "?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? ?--? exp string ?matchVar? ?subMatchVar ...?");
+ "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
return JIM_ERR;
}
@@ -190,8 +190,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
result = JIM_ERR;
goto done;
}
- /* XXX: Ugly! */
- num_vars = 100;
+ num_vars = regex->re_nsub + 1;
}
pmatch = Jim_Alloc((num_vars + 1) * sizeof(*pmatch));
@@ -209,6 +208,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
else if (offset > 0) {
source_str += offset;
}
+ eflags |= REG_NOTBOL;
}
if (opt_inline) {
@@ -243,7 +243,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
*/
j = 0;
- for (i += 2; opt_inline ? pmatch[j].rm_so != -1 : i < argc; i++, j++) {
+ for (i += 2; opt_inline ? j < num_vars : i < argc; i++, j++) {
Jim_Obj *resultObj;
if (opt_indices) {
@@ -290,10 +290,12 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
try_next_match:
if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
if (pmatch[0].rm_eo) {
+ offset += pmatch[0].rm_eo;
source_str += pmatch[0].rm_eo;
}
else {
source_str++;
+ offset++;
}
if (*source_str) {
eflags = REG_NOTBOL;
@@ -320,6 +322,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
{
int regcomp_flags = 0;
+ int regexec_flags = 0;
int opt_all = 0;
int offset = 0;
regex_t *regex;
@@ -340,7 +343,7 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
if (argc < 4) {
wrongNumArgs:
Jim_WrongNumArgs(interp, 1, argv,
- "?-nocase? ?-all? ?-line? ?-start offset? ?--? exp string subSpec ?varName?");
+ "?switches? exp string subSpec ?varName?");
return JIM_ERR;
}
@@ -359,7 +362,6 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
goto wrongNumArgs;
}
if (Jim_GetIndex(interp, argv[i], &offset) != JIM_OK) {
- printf("Failed getindex\n");
return JIM_ERR;
}
}
@@ -420,8 +422,9 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
*/
n = source_len - offset;
- for (p = source_str + offset; n;) {
- int match = regexec(regex, p, MAX_SUB_MATCHES, pmatch, p == source_str ? 0 : REG_NOTBOL);
+ p = source_str + offset;
+ do {
+ int match = regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags);
if (match >= REG_BADPAT) {
char buf[100];
@@ -483,12 +486,26 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
p += pmatch[0].rm_eo;
n -= pmatch[0].rm_eo;
- if (!opt_all || pmatch[0].rm_eo == 0 || pattern[0] == '^') {
- /* If we are doing a single match, or we haven't moved with this match
- * or this is an anchored match, we stop */
+ /* If -all is not specified, or there is no source left, we are done */
+ if (!opt_all || n == 0) {
break;
}
- }
+
+ /* An anchored pattern without -line must be done */
+ if ((regcomp_flags & REG_NEWLINE) == 0 && pattern[0] == '^') {
+ break;
+ }
+
+ /* If the pattern is empty, need to step forwards */
+ if (pattern[0] == '\0' && n) {
+ /* Need to copy the char we are moving over */
+ Jim_AppendString(interp, resultObj, p, 1);
+ p++;
+ n--;
+ }
+
+ regexec_flags |= REG_NOTBOL;
+ } while (n);
/*
* Copy the portion of the string after the last match to the