aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorK.Kosako <kkosako0@gmail.com>2024-06-08 00:06:04 +0900
committerK.Kosako <kkosako0@gmail.com>2024-06-08 00:06:04 +0900
commit8860d245bb89115358a3dda861a0bb1b0d1f1c98 (patch)
treeb7dde2360cc24a0754372a2ad746e6888bcde965
parentf2f9c695e0ec4584f14e5df29c7df6e8ff38862a (diff)
downloadoniguruma-skip_search.zip
oniguruma-skip_search.tar.gz
oniguruma-skip_search.tar.bz2
add new operator \J (skip search)skip_search
-rw-r--r--src/regexec.c65
-rw-r--r--src/regint.h6
-rw-r--r--src/regparse.c27
3 files changed, 93 insertions, 5 deletions
diff --git a/src/regexec.c b/src/regexec.c
index 732f980..b527177 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2022 K.Kosako
+ * Copyright (c) 2002-2024 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -177,6 +177,9 @@ typedef struct {
#ifdef USE_CALL
unsigned long subexp_call_in_search_counter;
#endif
+#ifdef USE_SKIP_SEARCH
+ UChar* skip_search;
+#endif
} MatchArg;
@@ -1261,6 +1264,7 @@ struct OnigCalloutArgsStruct {
#endif
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#ifdef USE_SKIP_SEARCH
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option)|(reg)->options;\
@@ -1272,6 +1276,35 @@ struct OnigCalloutArgsStruct {
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
+ (msa).skip_search = (UChar* )(arg_start);\
+} while(0)
+#else
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option)|(reg)->options;\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+ (msa).match_stack_limit = (mpv)->match_stack_limit;\
+ RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
+ SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
+ (msa).mp = mpv;\
+ (msa).best_len = ONIG_MISMATCH;\
+ (msa).ptr_num = PTR_NUM_SIZE(reg);\
+} while(0)
+#endif
+#else
+#ifdef USE_SKIP_SEARCH
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option)|(reg)->options;\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+ (msa).match_stack_limit = (mpv)->match_stack_limit;\
+ RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
+ SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
+ (msa).mp = mpv;\
+ (msa).ptr_num = PTR_NUM_SIZE(reg);\
+ (msa).skip_search = (UChar* )(arg_start);\
} while(0)
#else
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
@@ -1286,6 +1319,7 @@ struct OnigCalloutArgsStruct {
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
+#endif
#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
@@ -4335,6 +4369,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case UPDATE_VAR_RIGHT_RANGE_INIT:
INIT_RIGHT_RANGE;
break;
+#ifdef USE_SKIP_SEARCH
+ case UPDATE_VAR_SKIP_SEARCH:
+ if (s > msa->skip_search) msa->skip_search = s;
+ break;
+#endif
}
}
INC_OP;
@@ -5629,6 +5668,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
while (s <= high) {
MATCH_AND_RETURN_CHECK(data_range);
s += enclen(reg->enc, s);
+#ifdef USE_SKIP_SEARCH
+ if (s < msa.skip_search) s = msa.skip_search;
+#endif
}
} while (s < range);
goto mismatch;
@@ -5646,10 +5688,18 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
prev = s;
s += enclen(reg->enc, s);
- while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
- prev = s;
- s += enclen(reg->enc, s);
+#ifdef USE_SKIP_SEARCH
+ if (s < msa.skip_search) s = msa.skip_search;
+ else {
+#endif
+ while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) &&
+ s < range) {
+ prev = s;
+ s += enclen(reg->enc, s);
+ }
+#ifdef USE_SKIP_SEARCH
}
+#endif
}
goto mismatch;
}
@@ -5660,6 +5710,13 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
MATCH_AND_RETURN_CHECK(data_range);
if (s >= range) break;
s += enclen(reg->enc, s);
+
+#ifdef USE_SKIP_SEARCH
+ if (s < msa.skip_search) {
+ s = msa.skip_search;
+ if (s > range) break;
+ }
+#endif
}
}
else { /* backward search */
diff --git a/src/regint.h b/src/regint.h
index 32018e3..fe098b8 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2023 K.Kosako
+ * Copyright (c) 2002-2024 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -62,6 +62,7 @@
#define USE_REGSET
#define USE_CALL
#define USE_CALLOUT
+#define USE_SKIP_SEARCH
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_WHOLE_OPTIONS
#define USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
@@ -584,6 +585,9 @@ enum UpdateVarType {
UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3,
UPDATE_VAR_RIGHT_RANGE_TO_S = 4,
UPDATE_VAR_RIGHT_RANGE_INIT = 5,
+#ifdef USE_SKIP_SEARCH
+ UPDATE_VAR_SKIP_SEARCH = 6,
+#endif
};
enum CheckPositionType {
diff --git a/src/regparse.c b/src/regparse.c
index 36b6dd1..9104172 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -2781,6 +2781,16 @@ node_new_keep(Node** node, ParseEnv* env)
return ONIG_NORMAL;
}
+#ifdef USE_SKIP_SEARCH
+static int
+node_new_skip_search(Node** node, ParseEnv* env)
+{
+ int r;
+ r = node_new_update_var_gimmick(node, UPDATE_VAR_SKIP_SEARCH, 0, env);
+ return r;
+}
+#endif
+
#ifdef USE_CALLOUT
extern void
@@ -4526,6 +4536,9 @@ enum TokenSyms {
TK_QUOTE_OPEN,
TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
TK_KEEP, /* \K */
+#ifdef USE_SKIP_SEARCH
+ TK_SKIP_SEARCH, /* \J */
+#endif
TK_GENERAL_NEWLINE, /* \R */
TK_NO_NEWLINE, /* \N */
TK_TRUE_ANYCHAR, /* \O */
@@ -5743,6 +5756,13 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
tok->type = TK_KEEP;
break;
+#ifdef USE_SKIP_SEARCH
+ case 'J':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
+ tok->type = TK_SKIP_SEARCH;
+ break;
+#endif
+
case 'R':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
tok->type = TK_GENERAL_NEWLINE;
@@ -9093,6 +9113,13 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r < 0) return r;
break;
+#ifdef USE_SKIP_SEARCH
+ case TK_SKIP_SEARCH:
+ r = node_new_skip_search(np, env);
+ if (r < 0) return r;
+ break;
+#endif
+
case TK_GENERAL_NEWLINE:
r = node_new_general_newline(np, env);
if (r < 0) return r;