From 2640194c73ef4853a7e7c74132b847133813cdaa Mon Sep 17 00:00:00 2001 From: "K.Kosako" Date: Sat, 15 Jun 2024 20:49:54 +0900 Subject: add new callout (*SKIP) #299 --- src/ascii.c | 5 +++- src/oniguruma.h | 2 ++ src/regexec.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- src/regint.h | 3 ++- 4 files changed, 75 insertions(+), 6 deletions(-) diff --git a/src/ascii.c b/src/ascii.c index f2dc0d3..4d59092 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -2,7 +2,7 @@ ascii.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2024 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,6 +44,9 @@ init(void) name = "FAIL"; BC0_P(name, fail); name = "MISMATCH"; BC0_P(name, mismatch); +#ifdef USE_SKIP_SEARCH + name = "SKIP"; BC0_P(name, skip); +#endif name = "MAX"; args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; diff --git a/src/oniguruma.h b/src/oniguruma.h index 521d89b..513a651 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -1074,6 +1074,8 @@ int onig_builtin_mismatch P_((OnigCalloutArgs* args, void* user_data)); ONIG_EXTERN int onig_builtin_error P_((OnigCalloutArgs* args, void* user_data)); ONIG_EXTERN +int onig_builtin_skip P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN int onig_builtin_count P_((OnigCalloutArgs* args, void* user_data)); ONIG_EXTERN int onig_builtin_total_count P_((OnigCalloutArgs* args, void* user_data)); diff --git a/src/regexec.c b/src/regexec.c index 732f980..3afe724 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2022 K.Kosako + * Copyright (c) 2002-2024 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -177,6 +177,9 @@ typedef struct { #ifdef USE_CALL unsigned long subexp_call_in_search_counter; #endif +#ifdef USE_SKIP_SEARCH + UChar* skip_search; +#endif } MatchArg; @@ -1261,6 +1264,7 @@ struct OnigCalloutArgsStruct { #endif #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#ifdef USE_SKIP_SEARCH #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option)|(reg)->options;\ @@ -1272,6 +1276,7 @@ struct OnigCalloutArgsStruct { (msa).mp = mpv;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ + (msa).skip_search = (UChar* )(arg_start);\ } while(0) #else #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ @@ -1283,9 +1288,38 @@ struct OnigCalloutArgsStruct { RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ (msa).mp = mpv;\ + (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ } while(0) #endif +#else +#ifdef USE_SKIP_SEARCH +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option)|(reg)->options;\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).match_stack_limit = (mpv)->match_stack_limit;\ + RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ + (msa).mp = mpv;\ + (msa).ptr_num = PTR_NUM_SIZE(reg);\ + (msa).skip_search = (UChar* )(arg_start);\ +} while(0) +#else +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option)|(reg)->options;\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).match_stack_limit = (mpv)->match_stack_limit;\ + RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ + (msa).mp = mpv;\ + (msa).ptr_num = PTR_NUM_SIZE(reg);\ +} while(0) +#endif +#endif #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) @@ -5629,6 +5663,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, while (s <= high) { MATCH_AND_RETURN_CHECK(data_range); s += enclen(reg->enc, s); +#ifdef USE_SKIP_SEARCH + if (s < msa.skip_search) s = msa.skip_search; +#endif } } while (s < range); goto mismatch; @@ -5646,10 +5683,18 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, prev = s; s += enclen(reg->enc, s); - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); +#ifdef USE_SKIP_SEARCH + if (s < msa.skip_search) s = msa.skip_search; + else { +#endif + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && + s < range) { + prev = s; + s += enclen(reg->enc, s); + } +#ifdef USE_SKIP_SEARCH } +#endif } goto mismatch; } @@ -5660,6 +5705,13 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, MATCH_AND_RETURN_CHECK(data_range); if (s >= range) break; s += enclen(reg->enc, s); + +#ifdef USE_SKIP_SEARCH + if (s < msa.skip_search) { + s = msa.skip_search; + if (s > range) break; + } +#endif } } else { /* backward search */ @@ -6368,6 +6420,17 @@ onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED) return n; } +#ifdef USE_SKIP_SEARCH +extern int +onig_builtin_skip(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + if (args->current > args->msa->skip_search) + args->msa->skip_search = (UChar* )args->current; + + return ONIG_NORMAL; +} +#endif + extern int onig_builtin_count(OnigCalloutArgs* args, void* user_data) { diff --git a/src/regint.h b/src/regint.h index 32018e3..b7a89a6 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2023 K.Kosako + * Copyright (c) 2002-2024 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,6 +62,7 @@ #define USE_REGSET #define USE_CALL #define USE_CALLOUT +#define USE_SKIP_SEARCH #define USE_BACKREF_WITH_LEVEL /* \k, \k */ #define USE_WHOLE_OPTIONS #define USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ -- cgit v1.1