From 8860d245bb89115358a3dda861a0bb1b0d1f1c98 Mon Sep 17 00:00:00 2001 From: "K.Kosako" Date: Sat, 8 Jun 2024 00:06:04 +0900 Subject: add new operator \J (skip search) --- src/regexec.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- src/regint.h | 6 +++++- src/regparse.c | 27 ++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 5 deletions(-) diff --git a/src/regexec.c b/src/regexec.c index 732f980..b527177 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2022 K.Kosako + * Copyright (c) 2002-2024 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -177,6 +177,9 @@ typedef struct { #ifdef USE_CALL unsigned long subexp_call_in_search_counter; #endif +#ifdef USE_SKIP_SEARCH + UChar* skip_search; +#endif } MatchArg; @@ -1261,6 +1264,7 @@ struct OnigCalloutArgsStruct { #endif #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#ifdef USE_SKIP_SEARCH #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option)|(reg)->options;\ @@ -1272,6 +1276,35 @@ struct OnigCalloutArgsStruct { (msa).mp = mpv;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ + (msa).skip_search = (UChar* )(arg_start);\ +} while(0) +#else +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option)|(reg)->options;\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).match_stack_limit = (mpv)->match_stack_limit;\ + RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ + (msa).mp = mpv;\ + (msa).best_len = ONIG_MISMATCH;\ + (msa).ptr_num = PTR_NUM_SIZE(reg);\ +} while(0) +#endif +#else +#ifdef USE_SKIP_SEARCH +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option)|(reg)->options;\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).match_stack_limit = (mpv)->match_stack_limit;\ + RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ + (msa).mp = mpv;\ + (msa).ptr_num = PTR_NUM_SIZE(reg);\ + (msa).skip_search = (UChar* )(arg_start);\ } while(0) #else #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ @@ -1286,6 +1319,7 @@ struct OnigCalloutArgsStruct { (msa).ptr_num = PTR_NUM_SIZE(reg);\ } while(0) #endif +#endif #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) @@ -4335,6 +4369,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case UPDATE_VAR_RIGHT_RANGE_INIT: INIT_RIGHT_RANGE; break; +#ifdef USE_SKIP_SEARCH + case UPDATE_VAR_SKIP_SEARCH: + if (s > msa->skip_search) msa->skip_search = s; + break; +#endif } } INC_OP; @@ -5629,6 +5668,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, while (s <= high) { MATCH_AND_RETURN_CHECK(data_range); s += enclen(reg->enc, s); +#ifdef USE_SKIP_SEARCH + if (s < msa.skip_search) s = msa.skip_search; +#endif } } while (s < range); goto mismatch; @@ -5646,10 +5688,18 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, prev = s; s += enclen(reg->enc, s); - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); +#ifdef USE_SKIP_SEARCH + if (s < msa.skip_search) s = msa.skip_search; + else { +#endif + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && + s < range) { + prev = s; + s += enclen(reg->enc, s); + } +#ifdef USE_SKIP_SEARCH } +#endif } goto mismatch; } @@ -5660,6 +5710,13 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, MATCH_AND_RETURN_CHECK(data_range); if (s >= range) break; s += enclen(reg->enc, s); + +#ifdef USE_SKIP_SEARCH + if (s < msa.skip_search) { + s = msa.skip_search; + if (s > range) break; + } +#endif } } else { /* backward search */ diff --git a/src/regint.h b/src/regint.h index 32018e3..fe098b8 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2023 K.Kosako + * Copyright (c) 2002-2024 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,6 +62,7 @@ #define USE_REGSET #define USE_CALL #define USE_CALLOUT +#define USE_SKIP_SEARCH #define USE_BACKREF_WITH_LEVEL /* \k, \k */ #define USE_WHOLE_OPTIONS #define USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ @@ -584,6 +585,9 @@ enum UpdateVarType { UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3, UPDATE_VAR_RIGHT_RANGE_TO_S = 4, UPDATE_VAR_RIGHT_RANGE_INIT = 5, +#ifdef USE_SKIP_SEARCH + UPDATE_VAR_SKIP_SEARCH = 6, +#endif }; enum CheckPositionType { diff --git a/src/regparse.c b/src/regparse.c index 36b6dd1..9104172 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2781,6 +2781,16 @@ node_new_keep(Node** node, ParseEnv* env) return ONIG_NORMAL; } +#ifdef USE_SKIP_SEARCH +static int +node_new_skip_search(Node** node, ParseEnv* env) +{ + int r; + r = node_new_update_var_gimmick(node, UPDATE_VAR_SKIP_SEARCH, 0, env); + return r; +} +#endif + #ifdef USE_CALLOUT extern void @@ -4526,6 +4536,9 @@ enum TokenSyms { TK_QUOTE_OPEN, TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ TK_KEEP, /* \K */ +#ifdef USE_SKIP_SEARCH + TK_SKIP_SEARCH, /* \J */ +#endif TK_GENERAL_NEWLINE, /* \R */ TK_NO_NEWLINE, /* \N */ TK_TRUE_ANYCHAR, /* \O */ @@ -5743,6 +5756,13 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env) tok->type = TK_KEEP; break; +#ifdef USE_SKIP_SEARCH + case 'J': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break; + tok->type = TK_SKIP_SEARCH; + break; +#endif + case 'R': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break; tok->type = TK_GENERAL_NEWLINE; @@ -9093,6 +9113,13 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (r < 0) return r; break; +#ifdef USE_SKIP_SEARCH + case TK_SKIP_SEARCH: + r = node_new_skip_search(np, env); + if (r < 0) return r; + break; +#endif + case TK_GENERAL_NEWLINE: r = node_new_general_newline(np, env); if (r < 0) return r; -- cgit v1.1