diff options
author | Ulrich Drepper <drepper@redhat.com> | 2000-09-26 00:05:52 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2000-09-26 00:05:52 +0000 |
commit | 0b95971d92a07bdc4719f41fbbb23cfc8decb2f7 (patch) | |
tree | bcea861005746afbc240e24884a73ebd4d51792e /iconvdata | |
parent | a2aa7df3d6bf73cda977ee050a503a7f7a78a82d (diff) | |
download | glibc-0b95971d92a07bdc4719f41fbbb23cfc8decb2f7.zip glibc-0b95971d92a07bdc4719f41fbbb23cfc8decb2f7.tar.gz glibc-0b95971d92a07bdc4719f41fbbb23cfc8decb2f7.tar.bz2 |
Update.
2000-09-23 Bruno Haible <haible@clisp.cons.org>
* iconvdata/gbk.c (USE_PRIVATE_AREA): Define to 0.
(__gbk_to_ucs): Conditionalize private area mappings.
(__gbk_from_ucs4_tab9): Likewise.
(BODY for TO_LOOP): Likewise.
* iconvdata/testdata/GBK: Don't use characters not yet in Unicode.
* iconvdata/testdata/GBK..UTF-8: Likewise.
* iconvdata/tst-tables.sh: Enable GBK test.
2000-09-23 Bruno Haible <haible@clisp.cons.org>
* iconvdata/gbk.c (__gbk_to_ucs): Swap U+2014 and U+2015.
(__gbk_from_ucs4_tab4): Swap entries for U+2014 and U+2015.
(BODY for FROM_LOOP): Reject input > 0xFEA0, avoids out-of-bounds
array access.
* iconvdata/gbgbk.c (BODY for FROM_LOOP): Map 0xA844 to 0xA1AA.
* iconvdata/testdata/GBK..UTF8: Swap U+2014 and U+2015.
2000-09-23 Bruno Haible <haible@clisp.cons.org>
* iconvdata/johab.c (final_to_ucs): Fix typos.
(jamo_from_ucs_table): Likewise.
(BODY for FROM_LOOP): Map 0x5c to U+20A9. Reject ranges
0xD9E6..0xD9FE and 0xDEF2..0xDEFE.
(BODY for TO_LOOP): Map U+20A9 to 0x5c. Don't produce values in
the range 0xD9E6..0xD9FE.
* iconvdata/tst-tables.sh: Enable JOHAB testing.
Diffstat (limited to 'iconvdata')
-rw-r--r-- | iconvdata/gbk.c | 33 | ||||
-rw-r--r-- | iconvdata/testdata/GBK | 18 | ||||
-rw-r--r-- | iconvdata/testdata/GBK..UTF8 | 18 | ||||
-rwxr-xr-x | iconvdata/tst-tables.sh | 2 |
4 files changed, 47 insertions, 24 deletions
diff --git a/iconvdata/gbk.c b/iconvdata/gbk.c index f9a53ff..b07f392 100644 --- a/iconvdata/gbk.c +++ b/iconvdata/gbk.c @@ -26,6 +26,12 @@ #include <wchar.h> #include <assert.h> +/* Unicode 3.0.1 does not contain all the characters in GBK. Define + USE_PRIVATE_AREA to 1 in order to use mappings from/to the Unicode + Private Use area. Until we see other systems using the same mappings, + it is disabled. */ +#define USE_PRIVATE_AREA 0 + /* The conversion table to UCS4 has almost no holes. It can be generated with: perl tab.pl < gbk.txt @@ -1739,7 +1745,13 @@ static const uint16_t __gbk_to_ucs[] = [0x1db0] = 0x00f2, [0x1db1] = 0x016b, [0x1db2] = 0x00fa, [0x1db3] = 0x01d4, [0x1db4] = 0x00f9, [0x1db5] = 0x01d6, [0x1db6] = 0x01d8, [0x1db7] = 0x01da, [0x1db8] = 0x01dc, [0x1db9] = 0x00fc, [0x1dba] = 0x00ea, [0x1dbb] = 0x0251, - [0x1dbc] = 0xe7c7, [0x1dbd] = 0x0144, [0x1dbe] = 0x0148, [0x1dbf] = 0xe7c8, +#if USE_PRIVATE_AREA + [0x1dbc] = 0xe7c7, +#endif + [0x1dbd] = 0x0144, [0x1dbe] = 0x0148, +#if USE_PRIVATE_AREA + [0x1dbf] = 0xe7c8, +#endif [0x1dc0] = 0x0261, [0x1dc5] = 0x3105, [0x1dc6] = 0x3106, [0x1dc7] = 0x3107, [0x1dc8] = 0x3108, [0x1dc9] = 0x3109, [0x1dca] = 0x310a, [0x1dcb] = 0x310b, [0x1dcc] = 0x310c, [0x1dcd] = 0x310d, [0x1dce] = 0x310e, [0x1dcf] = 0x310f, @@ -1766,10 +1778,14 @@ static const uint16_t __gbk_to_ucs[] = [0x1e3b] = 0xfe5e, [0x1e3c] = 0xfe5f, [0x1e3d] = 0xfe60, [0x1e3e] = 0xfe61, [0x1e40] = 0xfe62, [0x1e41] = 0xfe63, [0x1e42] = 0xfe64, [0x1e43] = 0xfe65, [0x1e44] = 0xfe66, [0x1e45] = 0xfe68, [0x1e46] = 0xfe69, [0x1e47] = 0xfe6a, - [0x1e48] = 0xfe6b, [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9, + [0x1e48] = 0xfe6b, +#if USE_PRIVATE_AREA + [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9, [0x1e4c] = 0xe7ea, [0x1e4d] = 0xe7eb, [0x1e4e] = 0xe7ec, [0x1e4f] = 0xe7ed, [0x1e50] = 0xe7ee, [0x1e51] = 0xe7ef, [0x1e52] = 0xe7f0, [0x1e53] = 0xe7f1, - [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3, [0x1e56] = 0x3007, [0x1e64] = 0x2500, + [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3, +#endif + [0x1e56] = 0x3007, [0x1e64] = 0x2500, [0x1e65] = 0x2501, [0x1e66] = 0x2502, [0x1e67] = 0x2503, [0x1e68] = 0x2504, [0x1e69] = 0x2505, [0x1e6a] = 0x2506, [0x1e6b] = 0x2507, [0x1e6c] = 0x2508, [0x1e6d] = 0x2509, [0x1e6e] = 0x250a, [0x1e6f] = 0x250b, [0x1e70] = 0x250c, @@ -5499,7 +5515,9 @@ static const uint16_t __gbk_to_ucs[] = [0x5dc2] = 0xfa0e, [0x5dc3] = 0xfa0f, [0x5dc4] = 0xfa11, [0x5dc5] = 0xfa13, [0x5dc6] = 0xfa14, [0x5dc7] = 0xfa18, [0x5dc8] = 0xfa1f, [0x5dc9] = 0xfa20, [0x5dca] = 0xfa21, [0x5dcb] = 0xfa23, [0x5dcc] = 0xfa24, [0x5dcd] = 0xfa27, - [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0xe815, [0x5dd1] = 0xe816, + [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, +#if USE_PRIVATE_AREA + [0x5dd0] = 0xe815, [0x5dd1] = 0xe816, [0x5dd2] = 0xe817, [0x5dd3] = 0xe818, [0x5dd4] = 0xe819, [0x5dd5] = 0xe81a, [0x5dd6] = 0xe81b, [0x5dd7] = 0xe81c, [0x5dd8] = 0xe81d, [0x5dd9] = 0xe81e, [0x5dda] = 0xe81f, [0x5ddb] = 0xe820, [0x5ddc] = 0xe821, [0x5ddd] = 0xe822, @@ -5520,6 +5538,9 @@ static const uint16_t __gbk_to_ucs[] = [0x5e17] = 0xe85b, [0x5e18] = 0xe85c, [0x5e19] = 0xe85d, [0x5e1a] = 0xe85e, [0x5e1b] = 0xe85f, [0x5e1c] = 0xe860, [0x5e1d] = 0xe861, [0x5e1e] = 0xe862, [0x5e1f] = 0xe863, [0x5e20] = 0xe864, +#else + [0x5e20] = 0x0000, +#endif }; /* The table can be created using @@ -12936,6 +12957,7 @@ static const char __gbk_from_ucs4_tab8[][2] = */ static const char __gbk_from_ucs4_tab9[][2] = { +#if USE_PRIVATE_AREA [0x0000] = "\xa8\xbc", [0x0001] = "\xa8\xbf", [0x0020] = "\xa9\x89", [0x0021] = "\xa9\x8a", [0x0022] = "\xa9\x8b", [0x0023] = "\xa9\x8c", [0x0024] = "\xa9\x8d", [0x0025] = "\xa9\x8e", [0x0026] = "\xa9\x8f", @@ -12968,6 +12990,7 @@ static const char __gbk_from_ucs4_tab9[][2] = [0x0096] = "\xfe\x99", [0x0097] = "\xfe\x9a", [0x0098] = "\xfe\x9b", [0x0099] = "\xfe\x9c", [0x009a] = "\xfe\x9d", [0x009b] = "\xfe\x9e", [0x009c] = "\xfe\x9f", [0x009d] = "\xfe\xa0", +#endif }; /* The table can be created using @@ -13418,7 +13441,7 @@ static const char __gbk_from_ucs4_tab12[][2] = cp = __gbk_from_ucs4_tab8[ch - 0x4e00]; \ break; \ case 0xe7c7 ... 0xe864: \ - cp = __gbk_from_ucs4_tab9[ch - 0xe7c7]; \ + cp = USE_PRIVATE_AREA ? __gbk_from_ucs4_tab9[ch - 0xe7c7] : "\0\0"; \ break; \ case 0xf92c: \ cp = "\xfd\x9c"; \ diff --git a/iconvdata/testdata/GBK b/iconvdata/testdata/GBK index 31f3541..615c47c 100644 --- a/iconvdata/testdata/GBK +++ b/iconvdata/testdata/GBK @@ -438,7 +438,7 @@ - + A8BC A8BF @@ -446,8 +446,8 @@ P Q R S T U V W Y Z \ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ - - + A989 A98A A98B A98C A98D A98E A98F + A990 A991 A992 A993 A994 A995 @@ -1402,9 +1402,9 @@ @ A B C D E F G H I J K L M N O - P Q R S T U V W X Y Z [ \ ] ^ _ - ` a b c d e f g h i j k l m n o - p q r s t u v w x y z { | } ~ - - - + FE50 FE51 FE52 FE53 FE54 FE55 FE56 FE57 FE58 FE59 FE5A FE5B FE5C FE5D FE5E FE5F + FE60 FE61 FE62 FE63 FE64 FE65 FE66 FE67 FE68 FE69 FE6A FE6B FE6C FE6D FE6E FE6F + FE70 FE71 FE72 FE73 FE74 FE75 FE76 FE77 FE78 FE79 FE7A FE7B FE7C FE7D FE7E + FE80 FE81 FE82 FE83 FE84 FE85 FE86 FE87 FE88 FE89 FE8A FE8B FE8C FE8D FE8E FE8F + FE90 FE91 FE92 FE93 FE94 FE95 FE96 FE97 FE98 FE99 FE9A FE9B FE9C FE9D FE9E FE9F + FEA0 diff --git a/iconvdata/testdata/GBK..UTF8 b/iconvdata/testdata/GBK..UTF8 index cadf723..6d8ce8c 100644 --- a/iconvdata/testdata/GBK..UTF8 +++ b/iconvdata/testdata/GBK..UTF8 @@ -438,7 +438,7 @@ █ ▉ ▊ ▋ ▌ ▍ ▎ ▏ ▓ ▔ ▕ ▼ ▽ ◢ ◣ ◤ ◥ ☉ ⊕ 〒 〝 〞 ā á ǎ à ē é ě è ī í ǐ ì ō ó ǒ - ò ū ú ǔ ù ǖ ǘ ǚ ǜ ü ê ɑ ń ň + ò ū ú ǔ ù ǖ ǘ ǚ ǜ ü ê ɑ A8BC ń ň A8BF ɡ ㄅ ㄆ ㄇ ㄈ ㄉ ㄊ ㄋ ㄌ ㄍ ㄎ ㄏ ㄐ ㄑ ㄒ ㄓ ㄔ ㄕ ㄖ ㄗ ㄘ ㄙ ㄚ ㄛ ㄜ ㄝ ㄞ ㄟ ㄠ ㄡ ㄢ ㄣ ㄤ ㄥ ㄦ ㄧ ㄨ ㄩ @@ -446,8 +446,8 @@ ㏄ ㏎ ㏑ ㏒ ㏕ ︰ ¬ ¦ ℡ ㈱ ‐ ー ゛ ゜ ヽ ヾ 〆 ゝ ゞ ﹉ ﹊ ﹋ ﹌ ﹍ ﹎ ﹏ ﹐ ﹑ ﹒ ﹔ ﹕ ﹖ ﹗ ﹙ ﹚ ﹛ ﹜ ﹝ ﹞ ﹟ ﹠ ﹡ - ﹢ ﹣ ﹤ ﹥ ﹦ ﹨ ﹩ ﹪ ﹫ - 〇 + ﹢ ﹣ ﹤ ﹥ ﹦ ﹨ ﹩ ﹪ ﹫ A989 A98A A98B A98C A98D A98E A98F + A990 A991 A992 A993 A994 A995 〇 ─ ━ │ ┃ ┄ ┅ ┆ ┇ ┈ ┉ ┊ ┋ ┌ ┍ ┎ ┏ ┐ ┑ ┒ ┓ └ ┕ ┖ ┗ ┘ ┙ ┚ ┛ ├ ┝ ┞ ┟ ┠ ┡ ┢ ┣ ┤ ┥ ┦ ┧ ┨ ┩ ┪ ┫ @@ -1402,9 +1402,9 @@ 龕 龖 龗 龘 龜 龝 龞 龡 龢 龣 龤 龥 郎 凉 秊 裏 隣 兀 嗀 﨎 﨏 﨑 﨓 﨔 礼 﨟 蘒 﨡 﨣 﨤 﨧 﨨 﨩 - - - - - - + FE50 FE51 FE52 FE53 FE54 FE55 FE56 FE57 FE58 FE59 FE5A FE5B FE5C FE5D FE5E FE5F + FE60 FE61 FE62 FE63 FE64 FE65 FE66 FE67 FE68 FE69 FE6A FE6B FE6C FE6D FE6E FE6F + FE70 FE71 FE72 FE73 FE74 FE75 FE76 FE77 FE78 FE79 FE7A FE7B FE7C FE7D FE7E + FE80 FE81 FE82 FE83 FE84 FE85 FE86 FE87 FE88 FE89 FE8A FE8B FE8C FE8D FE8E FE8F + FE90 FE91 FE92 FE93 FE94 FE95 FE96 FE97 FE98 FE99 FE9A FE9B FE9C FE9D FE9E FE9F + FEA0 diff --git a/iconvdata/tst-tables.sh b/iconvdata/tst-tables.sh index e743239..bc57e1b 100755 --- a/iconvdata/tst-tables.sh +++ b/iconvdata/tst-tables.sh @@ -191,7 +191,7 @@ cat <<EOF | BIG5HKSCS EUC-JP EUC-CN GB2312 - #GBK Converter uses private area characters + GBK EUC-TW GB18030 # |