aboutsummaryrefslogtreecommitdiff
path: root/iconvdata
diff options
context:
space:
mode:
Diffstat (limited to 'iconvdata')
-rw-r--r--iconvdata/SHIFT_JISX0213.irreversible2
-rw-r--r--iconvdata/cns11643l1.c6
-rw-r--r--iconvdata/euc-jisx0213.c204
-rw-r--r--iconvdata/shift_jisx0213.c224
4 files changed, 201 insertions, 235 deletions
diff --git a/iconvdata/SHIFT_JISX0213.irreversible b/iconvdata/SHIFT_JISX0213.irreversible
new file mode 100644
index 0000000..cf99b72
--- /dev/null
+++ b/iconvdata/SHIFT_JISX0213.irreversible
@@ -0,0 +1,2 @@
+0x5C 0x005C
+0x7E 0x007E
diff --git a/iconvdata/cns11643l1.c b/iconvdata/cns11643l1.c
index 7e179e3..1ea1297 100644
--- a/iconvdata/cns11643l1.c
+++ b/iconvdata/cns11643l1.c
@@ -1,5 +1,5 @@
/* Mapping tables for CNS 11643, plane 1 handling.
- Copyright (C) 1998, 2000, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
@@ -20,9 +20,9 @@
#include <stdint.h>
-/* To generate a Unicode 3.1 CNS11643.TXT, take
+/* To generate a Unicode 3.2 CNS11643.TXT, take
http://www.unicode.org/Public/Mappings/EASTASIA/OTHER/CNS11643.TXT
- and add the following lines (see Unicode 3.1 UNIHAN.TXT):
+ and add the following lines (see Unicode 3.2 UNIHAN.TXT):
0x12728 0x4EA0 # <CJK Ideograph>
0x1272F 0x51AB # <CJK Ideograph>
0x12734 0x52F9 # <CJK Ideograph>
diff --git a/iconvdata/euc-jisx0213.c b/iconvdata/euc-jisx0213.c
index b0836b1..c696f94 100644
--- a/iconvdata/euc-jisx0213.c
+++ b/iconvdata/euc-jisx0213.c
@@ -62,9 +62,7 @@
*statep = saved_state
-/* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state
- contains the last UCS-4 character, shifted by 3 bits.
- During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
+/* During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
contains the last two bytes to be output, shifted by 3 bits. */
/* Since this is a stateful encoding we have to provide code which resets
@@ -74,17 +72,8 @@
if (data->__statep->__count != 0) \
{ \
if (FROM_DIRECTION) \
- { \
- if (__builtin_expect (outbuf + 4 <= outend, 1)) \
- { \
- /* Write out the last character. */ \
- *((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \
- data->__statep->__count = 0; \
- } \
- else \
- /* We don't have enough room in the output buffer. */ \
- status = __GCONV_FULL_OUTPUT; \
- } \
+ /* We don't use shift states in the FROM_DIRECTION. */ \
+ data->__statep->__count = 0; \
else \
{ \
if (__builtin_expect (outbuf + 2 <= outend, 1)) \
@@ -109,33 +98,44 @@
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t ch; \
+ uint32_t ch = *inptr; \
\
- /* Determine whether there is a buffered character pending. */ \
- ch = *statep >> 3; \
- if (__builtin_expect (ch == 0, 1)) \
+ if (ch < 0x80) \
+ /* Plain ASCII character. */ \
+ ++inptr; \
+ else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
{ \
- /* No - so look at the next input byte. */ \
- ch = *inptr; \
- if (ch < 0x80) \
- /* Plain ASCII character. */ \
- ++inptr; \
- else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
+ /* Two or three byte character. */ \
+ uint32_t ch2; \
+ \
+ if (__builtin_expect (inptr + 1 >= inend, 0)) \
{ \
- /* Two or three byte character. */ \
- uint32_t ch2; \
+ /* The second byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ ch2 = inptr[1]; \
\
- if (__builtin_expect (inptr + 1 >= inend, 0)) \
+ /* The second byte must be >= 0xa1 and <= 0xfe. */ \
+ if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \
+ { \
+ /* This is an illegal character. */ \
+ if (! ignore_errors_p ()) \
{ \
- /* The second byte is not available. */ \
- result = __GCONV_INCOMPLETE_INPUT; \
+ result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
\
- ch2 = inptr[1]; \
+ ++inptr; \
+ ++*irreversible; \
+ break; \
+ } \
\
- /* The second byte must be >= 0xa1 and <= 0xfe. */ \
- if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \
+ if (ch == 0x8e) \
+ { \
+ /* Half-width katakana. */ \
+ if (__builtin_expect (ch2 > 0xdf, 0)) \
{ \
/* This is an illegal character. */ \
if (! ignore_errors_p ()) \
@@ -149,107 +149,89 @@
break; \
} \
\
- if (ch == 0x8e) \
+ ch = ch2 + 0xfec0; \
+ inptr += 2; \
+ } \
+ else \
+ { \
+ const unsigned char *endp; \
+ \
+ if (ch == 0x8f) \
{ \
- /* Half-width katakana. */ \
- if (__builtin_expect (ch2 > 0xdf, 0)) \
+ /* JISX 0213 plane 2. */ \
+ uint32_t ch3; \
+ \
+ if (__builtin_expect (inptr + 2 >= inend, 0)) \
{ \
- /* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
+ /* The third byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
- ch = ch2 + 0xfec0; \
- inptr += 2; \
+ ch3 = inptr[2]; \
+ endp = inptr + 3; \
+ \
+ ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
} \
else \
{ \
- const unsigned char *endp; \
+ /* JISX 0213 plane 1. */ \
+ endp = inptr + 2; \
\
- if (ch == 0x8f) \
- { \
- /* JISX 0213 plane 2. */ \
- uint32_t ch3; \
+ ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
+ } \
\
- if (__builtin_expect (inptr + 2 >= inend, 0)) \
- { \
- /* The third byte is not available. */ \
- result = __GCONV_INCOMPLETE_INPUT; \
- break; \
- } \
+ if (ch == 0) \
+ { \
+ /* This is an illegal character. */ \
+ if (! ignore_errors_p ()) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
\
- ch3 = inptr[2]; \
- endp = inptr + 3; \
+ ++inptr; \
+ ++*irreversible; \
+ break; \
+ } \
\
- ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
- } \
- else \
- { \
- /* JISX 0213 plane 1. */ \
- endp = inptr + 2; \
+ inptr = endp; \
\
- ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
- } \
+ if (ch < 0x80) \
+ { \
+ /* It's a combining character. */ \
+ uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
+ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
\
- if (ch == 0) \
+ /* See whether we have room for two characters. */ \
+ if (outptr + 8 <= outend) \
{ \
- /* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
- break; \
+ put32 (outptr, u1); \
+ outptr += 4; \
+ put32 (outptr, u2); \
+ outptr += 4; \
+ continue; \
} \
- \
- inptr = endp; \
- \
- if (ch < 0x80) \
+ else \
{ \
- /* It's a combining character. */ \
- uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
- uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
- \
- /* See whether we have room for two characters. */ \
- if (outptr + 8 <= outend) \
- { \
- put32 (outptr, u1); \
- outptr += 4; \
- put32 (outptr, u2); \
- outptr += 4; \
- continue; \
- } \
- \
- /* Otherwise store only the first character now, and \
- put the second one into the queue. */ \
- ch = u1; \
- *statep = u2 << 3; \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
} \
} \
} \
- else \
+ } \
+ else \
+ { \
+ /* This is illegal. */ \
+ if (! ignore_errors_p ()) \
{ \
- /* This is illegal. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
- continue; \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
} \
+ \
+ ++inptr; \
+ ++*irreversible; \
+ continue; \
} \
\
put32 (outptr, ch); \
diff --git a/iconvdata/shift_jisx0213.c b/iconvdata/shift_jisx0213.c
index 81fef70..0c1ca72 100644
--- a/iconvdata/shift_jisx0213.c
+++ b/iconvdata/shift_jisx0213.c
@@ -62,9 +62,7 @@
*statep = saved_state
-/* During Shift_JISX0213 to UCS-4 conversion, the COUNT element of the state
- contains the last UCS-4 character, shifted by 3 bits.
- During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
+/* During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
contains the last two bytes to be output, shifted by 3 bits. */
/* Since this is a stateful encoding we have to provide code which resets
@@ -74,17 +72,8 @@
if (data->__statep->__count != 0) \
{ \
if (FROM_DIRECTION) \
- { \
- if (__builtin_expect (outbuf + 4 <= outend, 1)) \
- { \
- /* Write out the last character. */ \
- *((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \
- data->__statep->__count = 0; \
- } \
- else \
- /* We don't have enough room in the output buffer. */ \
- status = __GCONV_FULL_OUTPUT; \
- } \
+ /* We don't use shift states in the FROM_DIRECTION. */ \
+ data->__statep->__count = 0; \
else \
{ \
if (__builtin_expect (outbuf + 2 <= outend, 1)) \
@@ -109,137 +98,130 @@
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t ch; \
+ uint32_t ch = *inptr; \
\
- /* Determine whether there is a buffered character pending. */ \
- ch = *statep >> 3; \
- if (__builtin_expect (ch == 0, 1)) \
+ if (ch < 0x80) \
{ \
- /* No - so look at the next input byte. */ \
- ch = *inptr; \
- if (ch < 0x80) \
- { \
- /* Plain ISO646-JP character. */ \
- if (__builtin_expect (ch == 0x5c, 0)) \
- ch = 0xa5; \
- else if (__builtin_expect (ch == 0x7e, 0)) \
- ch = 0x203e; \
- ++inptr; \
- } \
- else if (ch >= 0xa1 && ch <= 0xdf) \
+ /* Plain ISO646-JP character. */ \
+ if (__builtin_expect (ch == 0x5c, 0)) \
+ ch = 0xa5; \
+ else if (__builtin_expect (ch == 0x7e, 0)) \
+ ch = 0x203e; \
+ ++inptr; \
+ } \
+ else if (ch >= 0xa1 && ch <= 0xdf) \
+ { \
+ /* Half-width katakana. */ \
+ ch += 0xfec0; \
+ ++inptr; \
+ } \
+ else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc)) \
+ { \
+ /* Two byte character. */ \
+ uint32_t ch2; \
+ \
+ if (__builtin_expect (inptr + 1 >= inend, 0)) \
{ \
- /* Half-width katakana. */ \
- ch += 0xfec0; \
- ++inptr; \
+ /* The second byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
} \
- else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc)) \
- { \
- /* Two byte character. */ \
- uint32_t ch2; \
- \
- if (__builtin_expect (inptr + 1 >= inend, 0)) \
- { \
- /* The second byte is not available. */ \
- result = __GCONV_INCOMPLETE_INPUT; \
- break; \
- } \
\
- ch2 = inptr[1]; \
+ ch2 = inptr[1]; \
\
- /* The second byte must be in the range 0x{40..7E,80..FC}. */ \
- if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))\
+ /* The second byte must be in the range 0x{40..7E,80..FC}. */ \
+ if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0)) \
+ { \
+ /* This is an illegal character. */ \
+ if (! ignore_errors_p ()) \
{ \
- /* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
+ result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
\
- /* Convert to row and column. */ \
- if (ch < 0xe0) \
- ch -= 0x81; \
- else \
- ch -= 0xc1; \
- if (ch2 < 0x80) \
- ch2 -= 0x40; \
- else \
- ch2 -= 0x41; \
- /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb. */ \
- ch = 2 * ch; \
- if (ch2 >= 0x5e) \
- ch2 -= 0x5e, ch++; \
- ch2 += 0x21; \
- if (ch >= 0x5e) \
- { \
- /* Handling of JISX 0213 plane 2 rows. */ \
- if (ch >= 0x67) \
- ch += 230; \
- else if (ch >= 0x63 || ch == 0x5f) \
- ch += 168; \
- else \
- ch += 162; \
- } \
+ ++inptr; \
+ ++*irreversible; \
+ break; \
+ } \
\
- ch = jisx0213_to_ucs4 (0x121 + ch, ch2); \
+ /* Convert to row and column. */ \
+ if (ch < 0xe0) \
+ ch -= 0x81; \
+ else \
+ ch -= 0xc1; \
+ if (ch2 < 0x80) \
+ ch2 -= 0x40; \
+ else \
+ ch2 -= 0x41; \
+ /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb. */ \
+ ch = 2 * ch; \
+ if (ch2 >= 0x5e) \
+ ch2 -= 0x5e, ch++; \
+ ch2 += 0x21; \
+ if (ch >= 0x5e) \
+ { \
+ /* Handling of JISX 0213 plane 2 rows. */ \
+ if (ch >= 0x67) \
+ ch += 230; \
+ else if (ch >= 0x63 || ch == 0x5f) \
+ ch += 168; \
+ else \
+ ch += 162; \
+ } \
\
- if (ch == 0) \
- { \
- /* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
+ ch = jisx0213_to_ucs4 (0x121 + ch, ch2); \
\
- ++inptr; \
- ++*irreversible; \
+ if (ch == 0) \
+ { \
+ /* This is an illegal character. */ \
+ if (! ignore_errors_p ()) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
break; \
- } \
+ } \
\
- inptr += 2; \
+ ++inptr; \
+ ++*irreversible; \
+ break; \
+ } \
\
- if (ch < 0x80) \
- { \
- /* It's a combining character. */ \
- uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
- uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
+ inptr += 2; \
\
- /* See whether we have room for two characters. */ \
- if (outptr + 8 <= outend) \
- { \
- put32 (outptr, u1); \
- outptr += 4; \
- put32 (outptr, u2); \
- outptr += 4; \
- continue; \
- } \
+ if (ch < 0x80) \
+ { \
+ /* It's a combining character. */ \
+ uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
+ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
\
- /* Otherwise store only the first character now, and \
- put the second one into the queue. */ \
- ch = u1; \
- *statep = u2 << 3; \
+ /* See whether we have room for two characters. */ \
+ if (outptr + 8 <= outend) \
+ { \
+ put32 (outptr, u1); \
+ outptr += 4; \
+ put32 (outptr, u2); \
+ outptr += 4; \
+ continue; \
} \
- } \
- else \
- { \
- /* This is illegal. */ \
- if (! ignore_errors_p ()) \
+ else \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
+ result = __GCONV_FULL_OUTPUT; \
break; \
} \
- \
- ++inptr; \
- ++*irreversible; \
- continue; \
} \
} \
+ else \
+ { \
+ /* This is illegal. */ \
+ if (! ignore_errors_p ()) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ ++inptr; \
+ ++*irreversible; \
+ continue; \
+ } \
\
put32 (outptr, ch); \
outptr += 4; \