aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorK.Kosako <kkosako0@gmail.com>2020-03-21 17:51:23 +0900
committerK.Kosako <kkosako0@gmail.com>2020-03-21 22:37:42 +0900
commit27c789fd3bfda6bd44bf24ae8a7a4ebeb68d0b8c (patch)
treecfc62a64eb955ff8c02115f302f61e8870b90826
parentf8085587a4451c3399cba21a44d3f4098a5b9717 (diff)
downloadoniguruma-issue_187_python_wchar_t.zip
oniguruma-issue_187_python_wchar_t.tar.gz
oniguruma-issue_187_python_wchar_t.tar.bz2
add ONIG_ENCODING_UTF16_BE_WCHAR and ONIG_ENCODING_UTF16_LE_WCHARissue_187_python_wchar_t
-rw-r--r--src/oniguruma.h4
-rw-r--r--src/utf16_be.c297
-rw-r--r--src/utf16_le.c259
3 files changed, 558 insertions, 2 deletions
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 6b1397a..86cb542 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -184,6 +184,8 @@ ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
ONIG_EXTERN OnigEncodingType OnigEncodingCP1251;
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE_WCHAR;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE_WCHAR;
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
@@ -216,6 +218,8 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251)
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
+#define ONIG_ENCODING_UTF16_BE_WCHAR (&OnigEncodingUTF16_BE_WCHAR)
+#define ONIG_ENCODING_UTF16_LE_WCHAR (&OnigEncodingUTF16_LE_WCHAR)
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
diff --git a/src/utf16_be.c b/src/utf16_be.c
index d237b93..4861f48 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -274,3 +274,298 @@ OnigEncodingType OnigEncodingUTF16_BE = {
ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_2,
0, 0
};
+
+#ifdef SIZEOF_WCHAR_T
+#if SIZEOF_WCHAR_T == 2
+
+OnigEncodingType OnigEncodingUTF16_BE_WCHAR = {
+ utf16be_mbc_enc_len,
+ "UTF-16BE_WCHAR2", /* name */
+ 4, /* max enc length */
+ 2, /* min enc length */
+ utf16be_is_mbc_newline,
+ utf16be_mbc_to_code,
+ utf16be_code_to_mbclen,
+ utf16be_code_to_mbc,
+ utf16be_mbc_case_fold,
+ onigenc_unicode_apply_all_case_fold,
+ utf16be_get_case_fold_codes_by_str,
+ onigenc_unicode_property_name_to_ctype,
+ onigenc_unicode_is_code_ctype,
+ onigenc_utf16_32_get_ctype_code_range,
+ utf16be_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match,
+ init,
+ 0, /* is_initialized */
+ is_valid_mbc_string,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_2,
+ 0, 0
+};
+
+#elif SIZEOF_WCHAR_T == 4
+
+static const int EncLen_UTF16_WCHAR4[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+};
+
+static int
+wchar4_init(void)
+{
+#ifdef USE_CALLOUT
+
+ int id;
+ OnigEncoding enc;
+ char* name;
+ unsigned int args[4];
+ OnigValue opts[4];
+
+ enc = ONIG_ENCODING_UTF16_BE;
+
+ name = "\000\000\000F\000\000\000A\000\000\000I\000\000\000L\000\000\000\000"; BC0_P(name, fail);
+ name = "\000\000\000M\000\000\000I\000\000\000S\000\000\000M\000\000\000A\000\000\000T\000\000\000C\000\000\000H\000\000\000\000"; BC0_P(name, mismatch);
+
+ name = "\000\000\000M\000\000\000A\000\000\000X\000\000\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_CHAR;
+ opts[0].c = 'X';
+ BC_B_O(name, max, 2, args, 1, opts);
+
+ name = "\000\000\000E\000\000\000R\000\000\000R\000\000\000O\000\000\000R\000\000\000\000";
+ args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
+ BC_P_O(name, error, 1, args, 1, opts);
+
+ name = "\000\000\000C\000\000\000O\000\000\000U\000\000\000N\000\000\000T\000\000\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, count, 1, args, 1, opts);
+
+ name = "\000\000\000T\000\000\000O\000\000\000T\000\000\000A\000\000\000L\000\000\000_\000\000\000C\000\000\000O\000\000\000U\000\000\000N\000\000\000T\000\000\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, total_count, 1, args, 1, opts);
+
+ name = "\000\000\000C\000\000\000M\000\000\000P\000\000\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_STRING;
+ args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ BC_P(name, cmp, 3, args);
+
+#endif /* USE_CALLOUT */
+
+ return ONIG_NORMAL;
+}
+
+static int
+wchar4_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16_WCHAR4[*(p+2)];
+}
+
+static int
+wchar4_is_valid_mbc_string(const UChar* s, const UChar* end)
+{
+ while (s + 3 < end) {
+ int len = utf16be_mbc_enc_len(s);
+ if (len == 8) {
+ if (s + 8 > end)
+ return FALSE;
+ if (! UTF16_IS_SURROGATE_SECOND(*(s+6)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*(s+2)))
+ return FALSE;
+
+ s += len;
+ }
+
+ if (s != end)
+ return FALSE;
+ else
+ return TRUE;
+}
+
+static int
+wchar4_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 3 < end) {
+ if (*(p+3) == NEWLINE_CODE && *(p+2) == 0x00)
+ return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((
+#ifndef USE_CRNL_AS_LINE_TERMINATOR
+ *(p+3) == 0x0d ||
+#endif
+ *(p+3) == 0x85) && *(p+2) == 0x00)
+ return 1;
+
+ if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28))
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+wchar4_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
+{
+ OnigCodePoint code;
+
+ if (UTF16_IS_SURROGATE_FIRST(*(p+2))) {
+ code = ((((p[2] - 0xd8) << 2) + ((p[3] & 0xc0) >> 6) + 1) << 16)
+ + ((((p[3] & 0x3f) << 2) + (p[6] - 0xdc)) << 8)
+ + p[7];
+ }
+ else {
+ code = p[2] * 256 + p[3];
+ }
+ return code;
+}
+
+static int
+wchar4_code_to_mbclen(OnigCodePoint code)
+{
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 8;
+ }
+ else {
+ return 4;
+ }
+}
+
+static int
+wchar4_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = (code >> 16) - 1;
+ high = (code & 0xff00) >> 8;
+
+ *p++ = 0x00;
+ *p++ = 0x00;
+ *p++ = (plane >> 2) + 0xd8;
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+
+ *p++ = 0x00;
+ *p++ = 0x00;
+ *p++ = (high & 0x03) + 0xdc;
+ *p = (UChar )(code & 0xff);
+ return 8;
+ }
+ else {
+ *p++ = 0x00;
+ *p++ = 0x00;
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar )(code & 0xff);
+ return 4;
+ }
+}
+
+static int
+wchar4_mbc_case_fold(OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end, UChar* fold)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0) {
+ p++;
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (*(p+3) == 0x49) {
+ *fold++ = 0x00;
+ *fold++ = 0x00;
+ *fold++ = 0x01;
+ *fold = 0x31;
+ (*pp) += 4;
+ return 4;
+ }
+ }
+#endif
+
+ *fold++ = 0x00;
+ *fold++ = 0x00;
+ *fold++ = 0x00;
+ *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ *pp += 4;
+ return 4;
+ }
+ else
+ return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE_WCHAR, flag,
+ pp, end, fold);
+}
+
+static int
+wchar4_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+ const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+ return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE_WCHAR, flag, p, end, items);
+}
+
+static UChar*
+wchar4_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int n;
+
+ if (s <= start) return (UChar* )s;
+
+ n = (s - start) % 4;
+ if (n != 0) {
+ s -= n;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*(s+2)) && s > start + 3 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-2)))
+ s -= 4;
+
+ return (UChar* )s;
+}
+
+
+OnigEncodingType OnigEncodingUTF16_BE_WCHAR = {
+ wchar4_mbc_enc_len,
+ "UTF-16BE_WCHAR4", /* name */
+ 8, /* max enc length */
+ 4, /* min enc length */
+ wchar4_is_mbc_newline,
+ wchar4_mbc_to_code,
+ wchar4_code_to_mbclen,
+ wchar4_code_to_mbc,
+ wchar4_mbc_case_fold,
+ onigenc_unicode_apply_all_case_fold,
+ wchar4_get_case_fold_codes_by_str,
+ onigenc_unicode_property_name_to_ctype,
+ onigenc_unicode_is_code_ctype,
+ onigenc_utf16_32_get_ctype_code_range,
+ wchar4_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match,
+ wchar4_init,
+ 0, /* is_initialized */
+ wchar4_is_valid_mbc_string,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_3,
+ 0, 0
+};
+
+#else
+ #error "Not supported SIZEOF_WCHAR_T"
+#endif
+#endif /* ifdef SIZEOF_WCHAR_T */
diff --git a/src/utf16_le.c b/src/utf16_le.c
index f14d263..195eff7 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -274,3 +274,260 @@ OnigEncodingType OnigEncodingUTF16_LE = {
ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
+
+
+#ifdef SIZEOF_WCHAR_T
+#if SIZEOF_WCHAR_T == 2
+
+OnigEncodingType OnigEncodingUTF16_LE_WCHAR = {
+ utf16le_mbc_enc_len,
+ "UTF-16LE_WCHAR2", /* name */
+ 4, /* max enc length */
+ 2, /* min enc length */
+ utf16le_is_mbc_newline,
+ utf16le_mbc_to_code,
+ utf16le_code_to_mbclen,
+ utf16le_code_to_mbc,
+ utf16le_mbc_case_fold,
+ onigenc_unicode_apply_all_case_fold,
+ utf16le_get_case_fold_codes_by_str,
+ onigenc_unicode_property_name_to_ctype,
+ onigenc_unicode_is_code_ctype,
+ onigenc_utf16_32_get_ctype_code_range,
+ utf16le_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match,
+ init,
+ 0, /* is_initialized */
+ is_valid_mbc_string,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
+ 0, 0
+};
+
+#elif SIZEOF_WCHAR_T == 4
+
+static const int EncLen_UTF16_WCHAR4[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+};
+
+static int
+wchar4_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16_WCHAR4[*(p+1)];
+}
+
+static int
+wchar4_init(void)
+{
+#ifdef USE_CALLOUT
+
+ int id;
+ OnigEncoding enc;
+ char* name;
+ unsigned int args[4];
+ OnigValue opts[4];
+
+ enc = ONIG_ENCODING_UTF16_LE;
+
+ name = "F\000\000\000A\000\000\000I\000\000\000L\000\000\000\000\000\000\000"; BC0_P(name, fail);
+ name = "M\000\000\000I\000\000\000S\000\000\000M\000\000\000A\000\000\000T\000\000\000C\000\000\000H\000\000\000\000\000\000\000"; BC0_P(name, mismatch);
+
+ name = "M\000\000\000A\000\000\000X\000\000\000\000\000\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_CHAR;
+ opts[0].c = 'X';
+ BC_B_O(name, max, 2, args, 1, opts);
+
+ name = "E\000\000\000R\000\000\000R\000\000\000O\000\000\000R\000\000\000\000\000\000\000";
+ args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
+ BC_P_O(name, error, 1, args, 1, opts);
+
+ name = "C\000\000\000O\000\000\000U\000\000\000N\000\000\000T\000\000\000\000\000\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, count, 1, args, 1, opts);
+
+ name = "T\000\000\000O\000\000\000T\000\000\000A\000\000\000L\000\000\000_\000\000\000C\000\000\000O\000\000\000U\000\000\000N\000\000\000T\000\000\000\000\000\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, total_count, 1, args, 1, opts);
+
+ name = "C\000\000\000M\000\000\000P\000\000\000\000\000\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_STRING;
+ args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ BC_P(name, cmp, 3, args);
+
+#endif /* USE_CALLOUT */
+
+ return ONIG_NORMAL;
+}
+
+static int
+wchar4_is_valid_mbc_string(const UChar* p, const UChar* end)
+{
+ const UChar* end1 = end - 1;
+
+ while (p < end1) {
+ int len = wchar4_mbc_enc_len(p);
+ if (len == 8) {
+ if (p + 7 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 5)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
+ return FALSE;
+
+ p += len;
+ }
+
+ if (p != end)
+ return FALSE;
+ else
+ return TRUE;
+}
+
+static int
+wchar4_code_to_mbclen(OnigCodePoint code)
+{
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 8;
+ }
+ else {
+ return 4;
+ }
+}
+
+static int
+wchar4_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = (code >> 16) - 1;
+ high = (code & 0xff00) >> 8;
+
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+ *p++ = (plane >> 2) + 0xd8;
+ *p++ = 0x00;
+ *p++ = 0x00;
+ *p++ = (UChar )(code & 0xff);
+ *p++ = (high & 0x03) + 0xdc;
+ *p++ = 0x00;
+ *p = 0x00;
+ return 8;
+ }
+ else {
+ *p++ = (UChar )(code & 0xff);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = 0x00;
+ *p = 0x00;
+ return 4;
+ }
+}
+
+static int
+wchar4_mbc_case_fold(OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end, UChar* fold)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (*p == 0x49) {
+ *fold++ = 0x31;
+ *fold++ = 0x01;
+ *fold++ = 0x00;
+ *fold = 0x00;
+ (*pp) += 4;
+ return 4;
+ }
+ }
+#endif
+
+ *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ *fold++ = 0x00;
+ *fold++ = 0x00;
+ *fold = 0x00;
+ *pp += 4;
+ return 4;
+ }
+ else
+ return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE_WCHAR, flag,
+ pp, end, fold);
+}
+
+static int
+wchar4_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+ const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+ return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE_WCHAR,
+ flag, p, end, items);
+}
+
+static UChar*
+wchar4_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int n;
+
+ if (s <= start) return (UChar* )s;
+
+ n = (s - start) % 4;
+ if (n != 0) {
+ s -= n;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 3 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-3)))
+ s -= 4;
+
+ return (UChar* )s;
+}
+
+
+OnigEncodingType OnigEncodingUTF16_LE_WCHAR = {
+ wchar4_mbc_enc_len,
+ "UTF-16LE_WCHAR4", /* name */
+ 8, /* max enc length */
+ 4, /* min enc length */
+ utf16le_is_mbc_newline,
+ utf16le_mbc_to_code,
+ wchar4_code_to_mbclen,
+ wchar4_code_to_mbc,
+ wchar4_mbc_case_fold,
+ onigenc_unicode_apply_all_case_fold,
+ wchar4_get_case_fold_codes_by_str,
+ onigenc_unicode_property_name_to_ctype,
+ onigenc_unicode_is_code_ctype,
+ onigenc_utf16_32_get_ctype_code_range,
+ wchar4_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match,
+ wchar4_init,
+ 0, /* is_initialized */
+ wchar4_is_valid_mbc_string,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
+ 0, 0
+};
+
+#else
+ #error "Not supported SIZEOF_WCHAR_T"
+#endif
+#endif /* ifdef SIZEOF_WCHAR_T */