aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkosako <kosako@sofnec.co.jp>2016-11-07 10:41:55 +0900
committerkosako <kosako@sofnec.co.jp>2016-11-07 10:41:55 +0900
commit8961d8ad42188043b255ee06fbf61f1292ef5ebd (patch)
tree1256d7d214a97a3c130dc921323392a7f25c2c21
parentd5e9cc2de2431fbaa833a9202e3cc89661dd95fe (diff)
parent700bbecaa2fa964a3b8099b72394cc5acb670768 (diff)
downloadoniguruma-6.1.2.zip
oniguruma-6.1.2.tar.gz
oniguruma-6.1.2.tar.bz2
merge from developv6.1.2
-rw-r--r--CMakeLists.txt2
-rw-r--r--HISTORY10
-rw-r--r--README.md6
-rw-r--r--configure.ac2
-rw-r--r--dist.info2
-rw-r--r--index.html4
-rw-r--r--index_ja.html4
-rw-r--r--src/big5.c23
-rw-r--r--src/euc_jp.c34
-rw-r--r--src/euc_kr.c22
-rw-r--r--src/euc_tw.c37
-rw-r--r--src/gb18030.c38
-rw-r--r--src/oniguruma.h9
-rw-r--r--src/regcomp.c28
-rw-r--r--src/regexec.c22
-rw-r--r--src/regparse.c53
-rw-r--r--src/sjis.c31
17 files changed, 286 insertions, 41 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5523a5f..ac8ba7f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8)
project(oniguruma C)
set(PACKAGE onig)
-set(PACKAGE_VERSION "6.1.1")
+set(PACKAGE_VERSION "6.1.2")
set(USE_COMBINATION_EXPLOSION_CHECK 0)
set(USE_CRNL_AS_LINE_TERMINATOR 0)
diff --git a/HISTORY b/HISTORY
index 21038f1..c59fe4b 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,5 +1,13 @@
History
+2016/11/07: Version 6.1.2
+
+2016/10/25: allow word bound, word begin and word end in look-behind.
+2016/10/19: add ONIG_OPTION_CHECK_VALIDITY_OF_STRING option.
+2016/10/16: fix use after free node.
+2016/10/10: fix memory leaks after parsing regexp error.
+2016/09/22: implement many of is_valid_mbc_string().
+
2016/09/02: Version 6.1.1
2016/08/31: fix segfault /W.?{888}{888}{888}\x00/ (found by libfuzzer)
@@ -1721,7 +1729,7 @@ History
2003/03/12: [spec] change named backref and subexp call format.
backref: \k<name>, call: \g<name> (thanks akr)
2003/03/11: [inst] add regparse.[ch] in win32/Makefile.
-2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't set
+2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't set,
then compile error in unset_addr_list_fix(). (thanks knu)
2003/03/10: [impl] divide regcomp.c to regcomp.c, regparse.c and regparse.h.
2003/03/10: [bug] should handle multi-byte code name in fetch_name().
diff --git a/README.md b/README.md
index a2c49cd..bfb41c7 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,12 @@ Supported character encodings:
* CP1251: contributed by Byte
+New feature of version 6.1.2
+--------------------------
+
+* allow word bound, word begin and word end in look-behind.
+* NEW option: ONIG_OPTION_CHECK_VALIDITY_OF_STRING
+
New feature of version 6.1
--------------------------
diff --git a/configure.ac b/configure.ac
index e7d8459..beeaf5a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT(onig, 6.1.1)
+AC_INIT(onig, 6.1.2)
AC_CONFIG_MACRO_DIR([m4])
diff --git a/dist.info b/dist.info
index 6ecfe65..8e8d1aa 100644
--- a/dist.info
+++ b/dist.info
@@ -1,7 +1,7 @@
--- This file is part of LuaDist project
name = "onig"
-version = "6.1.1"
+version = "6.1.2"
desc = "Oniguruma is a regular expressions library."
author = "K.Kosako"
diff --git a/index.html b/index.html
index 159d687..cf9177c 100644
--- a/index.html
+++ b/index.html
@@ -8,7 +8,7 @@
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
<p>
-(c) K.Kosako, updated at: 2016/08/31
+(c) K.Kosako, updated at: 2016/11/07
</p>
<dl>
@@ -16,9 +16,9 @@
<dt><b>What's new</b>
</font>
<ul>
+<li>2016/11/07: Version 6.1.2 released.</li>
<li>2016/09/02: Version 6.1.1 released.</li>
<li>2016/08/29: Version 6.1.0 released.</li>
-<li>2016/05/09: Version 6.0.0 released.</li>
<li>2014/12/12: Version 5.9.6 released.</li>
</ul>
</dl>
diff --git a/index_ja.html b/index_ja.html
index 0918897..e11e0f5 100644
--- a/index_ja.html
+++ b/index_ja.html
@@ -8,7 +8,7 @@
<h1>鬼車</h1>
<p>
-(c) K.Kosako, 最終更新: 2016/08/31
+(c) K.Kosako, 最終更新: 2016/11/07
</p>
<dl>
@@ -16,9 +16,9 @@
<dt><b>更新情報</b>
</font>
<ul>
+<li>2016/11/07: Version 6.1.2 リリース</li>
<li>2016/09/02: Version 6.1.1 リリース</li>
<li>2016/08/29: Version 6.1.0 リリース</li>
-<li>2016/05/09: Version 6.0.0 リリース</li>
<li>2014/12/12: Version 5.9.6 リリース</li>
</ul>
</dl>
diff --git a/src/big5.c b/src/big5.c
index 3d44975..bc713ab 100644
--- a/src/big5.c
+++ b/src/big5.c
@@ -55,9 +55,28 @@ big5_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_BIG5, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ return FALSE;
+ }
+ else if (*p < 0xff) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40) return FALSE;
+ if (*p > 0x7e && *p < 0xa1) return FALSE;
+ if (*p == 0xff) return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/euc_jp.c b/src/euc_jp.c
index 19422ce..3b54e95 100644
--- a/src/euc_jp.c
+++ b/src/euc_jp.c
@@ -57,9 +57,39 @@ mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_JP, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p > 0xa0) {
+ if (*p == 0xff) return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff) return FALSE;
+ p++;
+ }
+ else if (*p == 0x8e) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p > 0xdf) return FALSE;
+ p++;
+ }
+ else if (*p == 0x8f) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff) return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff) return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/euc_kr.c b/src/euc_kr.c
index 12803cd..450caf1 100644
--- a/src/euc_kr.c
+++ b/src/euc_kr.c
@@ -55,9 +55,27 @@ euckr_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_KR, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ return FALSE;
+ }
+ else if (*p < 0xff) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/euc_tw.c b/src/euc_tw.c
index 4e07567..b3ee628 100644
--- a/src/euc_tw.c
+++ b/src/euc_tw.c
@@ -55,9 +55,42 @@ euctw_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_TW, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ if (*p == 0x8e) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p > 0xb0) return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+ else if (*p < 0xff) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/gb18030.c b/src/gb18030.c
index 36fc3de..c8b5865 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -76,9 +76,43 @@ gb18030_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_GB18030, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p == 0x80 || *p == 0xff) {
+ return FALSE;
+ }
+ else {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40) {
+ if (*p < 0x30 || *p > 0x39)
+ return FALSE;
+
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x81 || *p == 0xff) return FALSE;
+
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x30 || *p > 0x39)
+ return FALSE;
+
+ p++;
+ }
+ else if (*p == 0x7f || *p == 0xff) {
+ return FALSE;
+ }
+ else {
+ p++;
+ }
+ }
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 5aa49f6..6090165 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -36,7 +36,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 1
-#define ONIGURUMA_VERSION_TEENY 1
+#define ONIGURUMA_VERSION_TEENY 2
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@@ -364,7 +364,7 @@ int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
-int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));
+int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
@@ -398,7 +398,8 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
-#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
+#define ONIG_OPTION_CHECK_VALIDITY_OF_STRING (ONIG_OPTION_POSIX_REGION << 1)
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_CHECK_VALIDITY_OF_STRING /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@@ -742,7 +743,7 @@ void onig_free P_((OnigRegex));
ONIG_EXTERN
void onig_free_body P_((OnigRegex));
ONIG_EXTERN
-int onig_scan(regex_t* reg, const UChar* str, const UChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);
+int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);
ONIG_EXTERN
int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
diff --git a/src/regcomp.c b/src/regcomp.c
index fb3de21..11ba1e7 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -1795,6 +1795,11 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
}
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
+ break;
+
default:
break;
}
@@ -1853,6 +1858,11 @@ renumber_by_map(Node* node, GroupNumRemap* map)
r = renumber_node_backref(node, map);
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = renumber_by_map(NANCHOR(node)->target, map);
+ break;
+
default:
break;
}
@@ -1884,6 +1894,11 @@ numbered_ref_check(Node* node)
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = numbered_ref_check(NANCHOR(node)->target);
+ break;
+
default:
break;
}
@@ -3875,9 +3890,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
#define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
+
#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
case ANCHOR_LOOK_BEHIND:
{
@@ -4712,6 +4728,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ANCHOR_END_BUF:
case ANCHOR_SEMI_END_BUF:
case ANCHOR_END_LINE:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
break;
@@ -4734,8 +4752,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
case ANCHOR_LOOK_BEHIND_NOT:
break;
}
@@ -4989,6 +5005,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
+ if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
+ reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
+
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
@@ -5252,6 +5271,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
UnsetAddrList uslist;
#endif
+ root = 0;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
#ifdef ONIG_DEBUG
diff --git a/src/regexec.c b/src/regexec.c
index 3599b21..7e8d3d1 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -3111,6 +3111,13 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
r = 0;
if (r == 0) {
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
+ r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ goto end;
+ }
+ }
+
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
r = match_at(reg, str, end,
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
@@ -3119,6 +3126,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
at, prev, &msa);
}
+ end:
MATCH_ARG_FREE(msa);
return r;
}
@@ -3391,6 +3399,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (start > end || start < str) goto mismatch_no_msa;
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
+ r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ goto finish_no_msa;
+ }
+ }
+
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
@@ -3747,6 +3762,13 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
int rs;
const UChar* start;
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
+ ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
+ }
+
n = 0;
start = str;
while (1) {
diff --git a/src/regparse.c b/src/regparse.c
index 1106c0b..8f1d1cb 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -29,6 +29,10 @@
#include "regparse.h"
#include "st.h"
+#ifdef DEBUG_NODE_FREE
+#include <stdio.h>
+#endif
+
#define WARN_BUFSIZE 256
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
@@ -1003,13 +1007,16 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
return 0;
}
-
extern void
onig_node_free(Node* node)
{
start:
if (IS_NULL(node)) return ;
+#ifdef DEBUG_NODE_FREE
+ fprintf(stderr, "onig_node_free: %p\n", node);
+#endif
+
switch (NTYPE(node)) {
case NT_STR:
if (NSTR(node)->capa != 0 &&
@@ -1071,6 +1078,9 @@ node_new(void)
node = (Node* )xmalloc(sizeof(Node));
/* xmemset(node, 0, sizeof(Node)); */
+#ifdef DEBUG_NODE_FREE
+ fprintf(stderr, "node_new: %p\n", node);
+#endif
return node;
}
@@ -4318,7 +4328,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
CClassNode* acc;
r = parse_char_class(&anode, tok, &p, end, env);
- if (r != 0) goto cc_open_err;
+ if (r != 0) {
+ onig_node_free(anode);
+ goto cc_open_err;
+ }
acc = NCCLASS(anode);
r = or_cclass(cc, acc, env->enc);
@@ -4412,7 +4425,6 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
err:
if (cc != NCCLASS(*np))
bbuf_free(cc->mbuf);
- onig_node_free(*np);
return r;
}
@@ -4542,11 +4554,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) {
- onig_node_free(*np);
return num;
}
else if (num >= (int )BIT_STATUS_BITS_NUM) {
- onig_node_free(*np);
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
}
NENCLOSE(*np)->regnum = num;
@@ -4614,7 +4624,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (r < 0) return r;
r = parse_subexp(&target, tok, term, &p, end, env);
env->option = prev;
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
*np = node_new_option(option);
CHECK_NULL_RETURN_MEMERR(*np);
NENCLOSE(*np)->target = target;
@@ -4647,7 +4660,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, &p, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
if (NTYPE(*np) == NT_ANCHOR)
NANCHOR(*np)->target = target;
@@ -4908,7 +4924,10 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (r < 0) return r;
r = parse_subexp(&target, tok, term, src, end, env);
env->option = prev;
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
NENCLOSE(*np)->target = target;
return tok->type;
}
@@ -5220,7 +5239,10 @@ parse_branch(Node** top, OnigToken* tok, int term,
*top = NULL;
r = parse_exp(&node, tok, term, src, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
if (r == TK_EOT || r == term || r == TK_ALT) {
*top = node;
@@ -5230,7 +5252,10 @@ parse_branch(Node** top, OnigToken* tok, int term,
headp = &(NCDR(*top));
while (r != TK_EOT && r != term && r != TK_ALT) {
r = parse_exp(&node, tok, term, src, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
if (NTYPE(node) == NT_LIST) {
*headp = node;
@@ -5272,8 +5297,10 @@ parse_subexp(Node** top, OnigToken* tok, int term,
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_branch(&node, tok, term, src, end, env);
- if (r < 0) return r;
-
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
*headp = onig_node_new_alt(node, NULL);
headp = &(NCDR(*headp));
}
@@ -5282,8 +5309,8 @@ parse_subexp(Node** top, OnigToken* tok, int term,
goto err;
}
else {
- err:
onig_node_free(node);
+ err:
if (term == TK_SUBEXP_CLOSE)
return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
else
diff --git a/src/sjis.c b/src/sjis.c
index a607b3d..3378474 100644
--- a/src/sjis.c
+++ b/src/sjis.c
@@ -77,9 +77,36 @@ mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_SJIS, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ if (*p == 0xa0 || *p == 0x80)
+ return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40 || *p > 0xfc || *p == 0x7f)
+ return FALSE;
+ p++;
+ }
+ else if (*p < 0xe0) {
+ p++;
+ }
+ else if (*p < 0xfd) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40 || *p > 0xfc || *p == 0x7f)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static int