From 2c747f537fbf2d44d5fab7f93c4b4ece9a4c883e Mon Sep 17 00:00:00 2001 From: "K.Kosako" Date: Wed, 5 Jun 2024 22:43:24 +0900 Subject: add new behavior ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC (#298) --- src/oniguruma.h | 3 ++- src/regparse.c | 15 ++++++++++----- src/regsyntax.c | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/oniguruma.h b/src/oniguruma.h index 66da319..521d89b 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2022 K.Kosako + * Copyright (c) 2002-2024 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -541,6 +541,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) #define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ #define ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (1U<<26) +#define ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC (1U<<27) /* [\w-%]=[\w\-%] */ /* syntax (behavior) warning */ #define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ #define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ diff --git a/src/regparse.c b/src/regparse.c index 3f764cb..36b6dd1 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -7092,11 +7092,16 @@ prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env) goto val_entry; } else if (r == TK_CC_AND) { + range_end_val_with_warning: CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } if (curr_type == CV_CPROP) { + if (IS_SYNTAX_BV(env->syntax, + ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC)) { + goto range_end_val_with_warning; + } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; goto err; } @@ -7127,16 +7132,16 @@ prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env) if (r < 0) goto err; fetched = 1; - if (r == TK_CC_CLOSE) + if (r == TK_CC_CLOSE) { goto range_end_val; /* allow [a-b-] */ + } else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; + goto range_end_val_with_warning; } if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ + /* [0-9-a] is allowed as [0-9\-a] */ + goto range_end_val_with_warning; } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; goto err; diff --git a/src/regsyntax.c b/src/regsyntax.c index 1a57f55..ae160a5 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -155,7 +155,8 @@ OnigSyntaxType OnigSyntaxJava = { ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | - ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND ) + ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND | + ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC ) , ONIG_OPTION_SINGLELINE , { @@ -188,7 +189,8 @@ OnigSyntaxType OnigSyntaxPerl = { ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT ) - , SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH + , (SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH | + ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC ) , ONIG_OPTION_SINGLELINE , { -- cgit v1.1