aboutsummaryrefslogtreecommitdiff
path: root/iconvdata
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-09-26 00:05:52 +0000
committerUlrich Drepper <drepper@redhat.com>2000-09-26 00:05:52 +0000
commit0b95971d92a07bdc4719f41fbbb23cfc8decb2f7 (patch)
treebcea861005746afbc240e24884a73ebd4d51792e /iconvdata
parenta2aa7df3d6bf73cda977ee050a503a7f7a78a82d (diff)
downloadglibc-0b95971d92a07bdc4719f41fbbb23cfc8decb2f7.zip
glibc-0b95971d92a07bdc4719f41fbbb23cfc8decb2f7.tar.gz
glibc-0b95971d92a07bdc4719f41fbbb23cfc8decb2f7.tar.bz2
Update.
2000-09-23 Bruno Haible <haible@clisp.cons.org> * iconvdata/gbk.c (USE_PRIVATE_AREA): Define to 0. (__gbk_to_ucs): Conditionalize private area mappings. (__gbk_from_ucs4_tab9): Likewise. (BODY for TO_LOOP): Likewise. * iconvdata/testdata/GBK: Don't use characters not yet in Unicode. * iconvdata/testdata/GBK..UTF-8: Likewise. * iconvdata/tst-tables.sh: Enable GBK test. 2000-09-23 Bruno Haible <haible@clisp.cons.org> * iconvdata/gbk.c (__gbk_to_ucs): Swap U+2014 and U+2015. (__gbk_from_ucs4_tab4): Swap entries for U+2014 and U+2015. (BODY for FROM_LOOP): Reject input > 0xFEA0, avoids out-of-bounds array access. * iconvdata/gbgbk.c (BODY for FROM_LOOP): Map 0xA844 to 0xA1AA. * iconvdata/testdata/GBK..UTF8: Swap U+2014 and U+2015. 2000-09-23 Bruno Haible <haible@clisp.cons.org> * iconvdata/johab.c (final_to_ucs): Fix typos. (jamo_from_ucs_table): Likewise. (BODY for FROM_LOOP): Map 0x5c to U+20A9. Reject ranges 0xD9E6..0xD9FE and 0xDEF2..0xDEFE. (BODY for TO_LOOP): Map U+20A9 to 0x5c. Don't produce values in the range 0xD9E6..0xD9FE. * iconvdata/tst-tables.sh: Enable JOHAB testing.
Diffstat (limited to 'iconvdata')
-rw-r--r--iconvdata/gbk.c33
-rw-r--r--iconvdata/testdata/GBK18
-rw-r--r--iconvdata/testdata/GBK..UTF818
-rwxr-xr-xiconvdata/tst-tables.sh2
4 files changed, 47 insertions, 24 deletions
diff --git a/iconvdata/gbk.c b/iconvdata/gbk.c
index f9a53ff..b07f392 100644
--- a/iconvdata/gbk.c
+++ b/iconvdata/gbk.c
@@ -26,6 +26,12 @@
#include <wchar.h>
#include <assert.h>
+/* Unicode 3.0.1 does not contain all the characters in GBK. Define
+ USE_PRIVATE_AREA to 1 in order to use mappings from/to the Unicode
+ Private Use area. Until we see other systems using the same mappings,
+ it is disabled. */
+#define USE_PRIVATE_AREA 0
+
/* The conversion table to UCS4 has almost no holes. It can be generated with:
perl tab.pl < gbk.txt
@@ -1739,7 +1745,13 @@ static const uint16_t __gbk_to_ucs[] =
[0x1db0] = 0x00f2, [0x1db1] = 0x016b, [0x1db2] = 0x00fa, [0x1db3] = 0x01d4,
[0x1db4] = 0x00f9, [0x1db5] = 0x01d6, [0x1db6] = 0x01d8, [0x1db7] = 0x01da,
[0x1db8] = 0x01dc, [0x1db9] = 0x00fc, [0x1dba] = 0x00ea, [0x1dbb] = 0x0251,
- [0x1dbc] = 0xe7c7, [0x1dbd] = 0x0144, [0x1dbe] = 0x0148, [0x1dbf] = 0xe7c8,
+#if USE_PRIVATE_AREA
+ [0x1dbc] = 0xe7c7,
+#endif
+ [0x1dbd] = 0x0144, [0x1dbe] = 0x0148,
+#if USE_PRIVATE_AREA
+ [0x1dbf] = 0xe7c8,
+#endif
[0x1dc0] = 0x0261, [0x1dc5] = 0x3105, [0x1dc6] = 0x3106, [0x1dc7] = 0x3107,
[0x1dc8] = 0x3108, [0x1dc9] = 0x3109, [0x1dca] = 0x310a, [0x1dcb] = 0x310b,
[0x1dcc] = 0x310c, [0x1dcd] = 0x310d, [0x1dce] = 0x310e, [0x1dcf] = 0x310f,
@@ -1766,10 +1778,14 @@ static const uint16_t __gbk_to_ucs[] =
[0x1e3b] = 0xfe5e, [0x1e3c] = 0xfe5f, [0x1e3d] = 0xfe60, [0x1e3e] = 0xfe61,
[0x1e40] = 0xfe62, [0x1e41] = 0xfe63, [0x1e42] = 0xfe64, [0x1e43] = 0xfe65,
[0x1e44] = 0xfe66, [0x1e45] = 0xfe68, [0x1e46] = 0xfe69, [0x1e47] = 0xfe6a,
- [0x1e48] = 0xfe6b, [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
+ [0x1e48] = 0xfe6b,
+#if USE_PRIVATE_AREA
+ [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
[0x1e4c] = 0xe7ea, [0x1e4d] = 0xe7eb, [0x1e4e] = 0xe7ec, [0x1e4f] = 0xe7ed,
[0x1e50] = 0xe7ee, [0x1e51] = 0xe7ef, [0x1e52] = 0xe7f0, [0x1e53] = 0xe7f1,
- [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3, [0x1e56] = 0x3007, [0x1e64] = 0x2500,
+ [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3,
+#endif
+ [0x1e56] = 0x3007, [0x1e64] = 0x2500,
[0x1e65] = 0x2501, [0x1e66] = 0x2502, [0x1e67] = 0x2503, [0x1e68] = 0x2504,
[0x1e69] = 0x2505, [0x1e6a] = 0x2506, [0x1e6b] = 0x2507, [0x1e6c] = 0x2508,
[0x1e6d] = 0x2509, [0x1e6e] = 0x250a, [0x1e6f] = 0x250b, [0x1e70] = 0x250c,
@@ -5499,7 +5515,9 @@ static const uint16_t __gbk_to_ucs[] =
[0x5dc2] = 0xfa0e, [0x5dc3] = 0xfa0f, [0x5dc4] = 0xfa11, [0x5dc5] = 0xfa13,
[0x5dc6] = 0xfa14, [0x5dc7] = 0xfa18, [0x5dc8] = 0xfa1f, [0x5dc9] = 0xfa20,
[0x5dca] = 0xfa21, [0x5dcb] = 0xfa23, [0x5dcc] = 0xfa24, [0x5dcd] = 0xfa27,
- [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
+ [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29,
+#if USE_PRIVATE_AREA
+ [0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
[0x5dd2] = 0xe817, [0x5dd3] = 0xe818, [0x5dd4] = 0xe819, [0x5dd5] = 0xe81a,
[0x5dd6] = 0xe81b, [0x5dd7] = 0xe81c, [0x5dd8] = 0xe81d, [0x5dd9] = 0xe81e,
[0x5dda] = 0xe81f, [0x5ddb] = 0xe820, [0x5ddc] = 0xe821, [0x5ddd] = 0xe822,
@@ -5520,6 +5538,9 @@ static const uint16_t __gbk_to_ucs[] =
[0x5e17] = 0xe85b, [0x5e18] = 0xe85c, [0x5e19] = 0xe85d, [0x5e1a] = 0xe85e,
[0x5e1b] = 0xe85f, [0x5e1c] = 0xe860, [0x5e1d] = 0xe861, [0x5e1e] = 0xe862,
[0x5e1f] = 0xe863, [0x5e20] = 0xe864,
+#else
+ [0x5e20] = 0x0000,
+#endif
};
/* The table can be created using
@@ -12936,6 +12957,7 @@ static const char __gbk_from_ucs4_tab8[][2] =
*/
static const char __gbk_from_ucs4_tab9[][2] =
{
+#if USE_PRIVATE_AREA
[0x0000] = "\xa8\xbc", [0x0001] = "\xa8\xbf", [0x0020] = "\xa9\x89",
[0x0021] = "\xa9\x8a", [0x0022] = "\xa9\x8b", [0x0023] = "\xa9\x8c",
[0x0024] = "\xa9\x8d", [0x0025] = "\xa9\x8e", [0x0026] = "\xa9\x8f",
@@ -12968,6 +12990,7 @@ static const char __gbk_from_ucs4_tab9[][2] =
[0x0096] = "\xfe\x99", [0x0097] = "\xfe\x9a", [0x0098] = "\xfe\x9b",
[0x0099] = "\xfe\x9c", [0x009a] = "\xfe\x9d", [0x009b] = "\xfe\x9e",
[0x009c] = "\xfe\x9f", [0x009d] = "\xfe\xa0",
+#endif
};
/* The table can be created using
@@ -13418,7 +13441,7 @@ static const char __gbk_from_ucs4_tab12[][2] =
cp = __gbk_from_ucs4_tab8[ch - 0x4e00]; \
break; \
case 0xe7c7 ... 0xe864: \
- cp = __gbk_from_ucs4_tab9[ch - 0xe7c7]; \
+ cp = USE_PRIVATE_AREA ? __gbk_from_ucs4_tab9[ch - 0xe7c7] : "\0\0"; \
break; \
case 0xf92c: \
cp = "\xfd\x9c"; \
diff --git a/iconvdata/testdata/GBK b/iconvdata/testdata/GBK
index 31f3541..615c47c 100644
--- a/iconvdata/testdata/GBK
+++ b/iconvdata/testdata/GBK
@@ -438,7 +438,7 @@
-
+ A8BC A8BF
@@ -446,8 +446,8 @@
P Q R S T U V W Y Z \
` a b c d e f g h i j k l m n o
p q r s t u v w x y z { | } ~
-
-
+ A989 A98A A98B A98C A98D A98E A98F
+ A990 A991 A992 A993 A994 A995
@@ -1402,9 +1402,9 @@
@ A B C D E F G H I J K L M N O
- P Q R S T U V W X Y Z [ \ ] ^ _
- ` a b c d e f g h i j k l m n o
- p q r s t u v w x y z { | } ~
-
-
-
+ FE50 FE51 FE52 FE53 FE54 FE55 FE56 FE57 FE58 FE59 FE5A FE5B FE5C FE5D FE5E FE5F
+ FE60 FE61 FE62 FE63 FE64 FE65 FE66 FE67 FE68 FE69 FE6A FE6B FE6C FE6D FE6E FE6F
+ FE70 FE71 FE72 FE73 FE74 FE75 FE76 FE77 FE78 FE79 FE7A FE7B FE7C FE7D FE7E
+ FE80 FE81 FE82 FE83 FE84 FE85 FE86 FE87 FE88 FE89 FE8A FE8B FE8C FE8D FE8E FE8F
+ FE90 FE91 FE92 FE93 FE94 FE95 FE96 FE97 FE98 FE99 FE9A FE9B FE9C FE9D FE9E FE9F
+ FEA0
diff --git a/iconvdata/testdata/GBK..UTF8 b/iconvdata/testdata/GBK..UTF8
index cadf723..6d8ce8c 100644
--- a/iconvdata/testdata/GBK..UTF8
+++ b/iconvdata/testdata/GBK..UTF8
@@ -438,7 +438,7 @@
█ ▉ ▊ ▋ ▌ ▍ ▎ ▏ ▓ ▔ ▕ ▼ ▽ ◢ ◣ ◤
◥ ☉ ⊕ 〒 〝 〞
ā á ǎ à ē é ě è ī í ǐ ì ō ó ǒ
- ò ū ú ǔ ù ǖ ǘ ǚ ǜ ü ê ɑ  ń ň 
+ ò ū ú ǔ ù ǖ ǘ ǚ ǜ ü ê ɑ A8BC ń ň A8BF
ɡ ㄅ ㄆ ㄇ ㄈ ㄉ ㄊ ㄋ ㄌ ㄍ ㄎ ㄏ
ㄐ ㄑ ㄒ ㄓ ㄔ ㄕ ㄖ ㄗ ㄘ ㄙ ㄚ ㄛ ㄜ ㄝ ㄞ ㄟ
ㄠ ㄡ ㄢ ㄣ ㄤ ㄥ ㄦ ㄧ ㄨ ㄩ
@@ -446,8 +446,8 @@
㏄ ㏎ ㏑ ㏒ ㏕ ︰ ¬ ¦ ℡ ㈱ ‐
ー ゛ ゜ ヽ ヾ 〆 ゝ ゞ ﹉ ﹊ ﹋ ﹌ ﹍ ﹎ ﹏ ﹐
﹑ ﹒ ﹔ ﹕ ﹖ ﹗ ﹙ ﹚ ﹛ ﹜ ﹝ ﹞ ﹟ ﹠ ﹡
- ﹢ ﹣ ﹤ ﹥ ﹦ ﹨ ﹩ ﹪ ﹫       
-       〇
+ ﹢ ﹣ ﹤ ﹥ ﹦ ﹨ ﹩ ﹪ ﹫ A989 A98A A98B A98C A98D A98E A98F
+ A990 A991 A992 A993 A994 A995 〇
─ ━ │ ┃ ┄ ┅ ┆ ┇ ┈ ┉ ┊ ┋
┌ ┍ ┎ ┏ ┐ ┑ ┒ ┓ └ ┕ ┖ ┗ ┘ ┙ ┚ ┛
├ ┝ ┞ ┟ ┠ ┡ ┢ ┣ ┤ ┥ ┦ ┧ ┨ ┩ ┪ ┫
@@ -1402,9 +1402,9 @@
龕 龖 龗 龘 龜 龝 龞 龡 龢 龣 龤 龥 郎 凉 秊 裏
兀 嗀 﨎 﨏 﨑 﨓 﨔 礼 﨟 蘒 﨡 﨣 﨤 﨧 﨨 﨩
-                
-                
-               
-                
-                
- 
+ FE50 FE51 FE52 FE53 FE54 FE55 FE56 FE57 FE58 FE59 FE5A FE5B FE5C FE5D FE5E FE5F
+ FE60 FE61 FE62 FE63 FE64 FE65 FE66 FE67 FE68 FE69 FE6A FE6B FE6C FE6D FE6E FE6F
+ FE70 FE71 FE72 FE73 FE74 FE75 FE76 FE77 FE78 FE79 FE7A FE7B FE7C FE7D FE7E
+ FE80 FE81 FE82 FE83 FE84 FE85 FE86 FE87 FE88 FE89 FE8A FE8B FE8C FE8D FE8E FE8F
+ FE90 FE91 FE92 FE93 FE94 FE95 FE96 FE97 FE98 FE99 FE9A FE9B FE9C FE9D FE9E FE9F
+ FEA0
diff --git a/iconvdata/tst-tables.sh b/iconvdata/tst-tables.sh
index e743239..bc57e1b 100755
--- a/iconvdata/tst-tables.sh
+++ b/iconvdata/tst-tables.sh
@@ -191,7 +191,7 @@ cat <<EOF |
BIG5HKSCS
EUC-JP
EUC-CN GB2312
- #GBK Converter uses private area characters
+ GBK
EUC-TW
GB18030
#