aboutsummaryrefslogtreecommitdiff
path: root/iconvdata/euc-jisx0213.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-09-24 04:19:03 +0000
committerUlrich Drepper <drepper@redhat.com>2002-09-24 04:19:03 +0000
commitfa00744e514a99087f5fe70cac9334b29a04c93a (patch)
treeafb188699c7ba3d139c0c1e5962a749bec6480b7 /iconvdata/euc-jisx0213.c
parentf2a444335f8deabb58145db315b33a87e4f576da (diff)
downloadglibc-fa00744e514a99087f5fe70cac9334b29a04c93a.zip
glibc-fa00744e514a99087f5fe70cac9334b29a04c93a.tar.gz
glibc-fa00744e514a99087f5fe70cac9334b29a04c93a.tar.bz2
Update.
* iconv/iconv_prog.c (main): Provide an error message that identifies the wrong encoding. 2002-09-22 Bruno Haible <bruno@clisp.org> * iconvdata/tscii.c: New file. * iconvdata/testdata/TSCII: New file. * iconvdata/testdata/TSCII..UTF8: New file. * iconvdata/TSCII.precomposed: New file. * iconvdata/TSCII.irreversible: New file. * iconvdata/gconv-modules (TSCII): New module. * iconvdata/Makefile (modules): Add TSCII. (distribute): Add tscii.c. * iconvdata/tst-table-from.c (try, utf8_decode, main): Double output buffer size. * iconvdata/tst-tables.sh: Add TSCII. * iconvdata/TESTS: Add TSCII. 2002-09-22 Bruno Haible <bruno@clisp.org> Revert 2002-04-18 patch. * iconvdata/euc-jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for FROM_DIRECTION): Make the FROM direction stateful again. * iconvdata/shift_jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for FROM_DIRECTION): Likewise. 2002-09-22 Bruno Haible <bruno@clisp.org>
Diffstat (limited to 'iconvdata/euc-jisx0213.c')
-rw-r--r--iconvdata/euc-jisx0213.c171
1 files changed, 96 insertions, 75 deletions
diff --git a/iconvdata/euc-jisx0213.c b/iconvdata/euc-jisx0213.c
index 8a41756..733cbc1 100644
--- a/iconvdata/euc-jisx0213.c
+++ b/iconvdata/euc-jisx0213.c
@@ -67,7 +67,9 @@
*statep = saved_state
-/* During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
+/* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state
+ contains the last UCS-4 character, shifted by 3 bits.
+ During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
contains the last two bytes to be output, shifted by 3 bits. */
/* Since this is a stateful encoding we have to provide code which resets
@@ -77,8 +79,17 @@
if (data->__statep->__count != 0) \
{ \
if (FROM_DIRECTION) \
- /* We don't use shift states in the FROM_DIRECTION. */ \
- data->__statep->__count = 0; \
+ { \
+ if (__builtin_expect (outbuf + 4 <= outend, 1)) \
+ { \
+ /* Write out the last character. */ \
+ *((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \
+ data->__statep->__count = 0; \
+ } \
+ else \
+ /* We don't have enough room in the output buffer. */ \
+ status = __GCONV_FULL_OUTPUT; \
+ } \
else \
{ \
if (__builtin_expect (outbuf + 2 <= outend, 1)) \
@@ -104,104 +115,114 @@
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t ch = *inptr; \
+ uint32_t ch; \
\
- if (ch < 0x80) \
- /* Plain ASCII character. */ \
- ++inptr; \
- else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
+ /* Determine whether there is a buffered character pending. */ \
+ ch = *statep >> 3; \
+ if (__builtin_expect (ch == 0, 1)) \
{ \
- /* Two or three byte character. */ \
- uint32_t ch2; \
+ /* No - so look at the next input byte. */ \
+ ch = *inptr; \
\
- if (__builtin_expect (inptr + 1 >= inend, 0)) \
+ if (ch < 0x80) \
+ /* Plain ASCII character. */ \
+ ++inptr; \
+ else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
{ \
- /* The second byte is not available. */ \
- result = __GCONV_INCOMPLETE_INPUT; \
- break; \
- } \
+ /* Two or three byte character. */ \
+ uint32_t ch2; \
\
- ch2 = inptr[1]; \
+ if (__builtin_expect (inptr + 1 >= inend, 0)) \
+ { \
+ /* The second byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
\
- /* The second byte must be >= 0xa1 and <= 0xfe. */ \
- if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \
- { \
- /* This is an illegal character. */ \
- STANDARD_FROM_LOOP_ERR_HANDLER (1); \
- } \
+ ch2 = inptr[1]; \
\
- if (ch == 0x8e) \
- { \
- /* Half-width katakana. */ \
- if (__builtin_expect (ch2 > 0xdf, 0)) \
- STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ /* The second byte must be >= 0xa1 and <= 0xfe. */ \
+ if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \
+ { \
+ /* This is an illegal character. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ } \
\
- ch = ch2 + 0xfec0; \
- inptr += 2; \
- } \
- else \
- { \
- const unsigned char *endp; \
+ if (ch == 0x8e) \
+ { \
+ /* Half-width katakana. */ \
+ if (__builtin_expect (ch2 > 0xdf, 0)) \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
- if (ch == 0x8f) \
+ ch = ch2 + 0xfec0; \
+ inptr += 2; \
+ } \
+ else \
{ \
- /* JISX 0213 plane 2. */ \
- uint32_t ch3; \
+ const unsigned char *endp; \
\
- if (__builtin_expect (inptr + 2 >= inend, 0)) \
+ if (ch == 0x8f) \
{ \
- /* The third byte is not available. */ \
- result = __GCONV_INCOMPLETE_INPUT; \
- break; \
- } \
+ /* JISX 0213 plane 2. */ \
+ uint32_t ch3; \
\
- ch3 = inptr[2]; \
- endp = inptr + 3; \
+ if (__builtin_expect (inptr + 2 >= inend, 0)) \
+ { \
+ /* The third byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
\
- ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
- } \
- else \
- { \
- /* JISX 0213 plane 1. */ \
- endp = inptr + 2; \
+ ch3 = inptr[2]; \
+ endp = inptr + 3; \
\
- ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
- } \
+ ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
+ } \
+ else \
+ { \
+ /* JISX 0213 plane 1. */ \
+ endp = inptr + 2; \
\
- if (ch == 0) \
- /* This is an illegal character. */ \
- STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
+ } \
\
- if (ch < 0x80) \
- { \
- /* It's a combining character. */ \
- uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
- uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
+ if (ch == 0) \
+ /* This is an illegal character. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
- /* See whether we have room for two characters. */ \
- if (outptr + 8 <= outend) \
+ inptr = endp; \
+ \
+ if (ch < 0x80) \
{ \
- inptr = endp; \
+ /* It's a combining character. */ \
+ uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
+ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
+ \
put32 (outptr, u1); \
outptr += 4; \
- put32 (outptr, u2); \
- outptr += 4; \
- continue; \
- } \
- else \
- { \
+ \
+ /* See whether we have room for two characters. */ \
+ if (outptr + 4 <= outend) \
+ { \
+ put32 (outptr, u2); \
+ outptr += 4; \
+ continue; \
+ } \
+ \
+ /* Otherwise store only the first character now, and \
+ put the second one into the queue. */ \
+ *statep = u2 << 3; \
+ /* Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
- \
- inptr = endp; \
} \
- } \
- else \
- { \
- /* This is illegal. */ \
- STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ else \
+ { \
+ /* This is illegal. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ } \
} \
\
put32 (outptr, ch); \