diff options
author | Ulrich Drepper <drepper@redhat.com> | 2002-09-24 04:19:03 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2002-09-24 04:19:03 +0000 |
commit | fa00744e514a99087f5fe70cac9334b29a04c93a (patch) | |
tree | afb188699c7ba3d139c0c1e5962a749bec6480b7 /iconvdata/euc-jisx0213.c | |
parent | f2a444335f8deabb58145db315b33a87e4f576da (diff) | |
download | glibc-fa00744e514a99087f5fe70cac9334b29a04c93a.zip glibc-fa00744e514a99087f5fe70cac9334b29a04c93a.tar.gz glibc-fa00744e514a99087f5fe70cac9334b29a04c93a.tar.bz2 |
Update.
* iconv/iconv_prog.c (main): Provide an error message that identifies
the wrong encoding.
2002-09-22 Bruno Haible <bruno@clisp.org>
* iconvdata/tscii.c: New file.
* iconvdata/testdata/TSCII: New file.
* iconvdata/testdata/TSCII..UTF8: New file.
* iconvdata/TSCII.precomposed: New file.
* iconvdata/TSCII.irreversible: New file.
* iconvdata/gconv-modules (TSCII): New module.
* iconvdata/Makefile (modules): Add TSCII.
(distribute): Add tscii.c.
* iconvdata/tst-table-from.c (try, utf8_decode, main): Double output
buffer size.
* iconvdata/tst-tables.sh: Add TSCII.
* iconvdata/TESTS: Add TSCII.
2002-09-22 Bruno Haible <bruno@clisp.org>
Revert 2002-04-18 patch.
* iconvdata/euc-jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
FROM_DIRECTION): Make the FROM direction stateful again.
* iconvdata/shift_jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
FROM_DIRECTION): Likewise.
2002-09-22 Bruno Haible <bruno@clisp.org>
Diffstat (limited to 'iconvdata/euc-jisx0213.c')
-rw-r--r-- | iconvdata/euc-jisx0213.c | 171 |
1 files changed, 96 insertions, 75 deletions
diff --git a/iconvdata/euc-jisx0213.c b/iconvdata/euc-jisx0213.c index 8a41756..733cbc1 100644 --- a/iconvdata/euc-jisx0213.c +++ b/iconvdata/euc-jisx0213.c @@ -67,7 +67,9 @@ *statep = saved_state -/* During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state +/* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state + contains the last UCS-4 character, shifted by 3 bits. + During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state contains the last two bytes to be output, shifted by 3 bits. */ /* Since this is a stateful encoding we have to provide code which resets @@ -77,8 +79,17 @@ if (data->__statep->__count != 0) \ { \ if (FROM_DIRECTION) \ - /* We don't use shift states in the FROM_DIRECTION. */ \ - data->__statep->__count = 0; \ + { \ + if (__builtin_expect (outbuf + 4 <= outend, 1)) \ + { \ + /* Write out the last character. */ \ + *((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \ + data->__statep->__count = 0; \ + } \ + else \ + /* We don't have enough room in the output buffer. */ \ + status = __GCONV_FULL_OUTPUT; \ + } \ else \ { \ if (__builtin_expect (outbuf + 2 <= outend, 1)) \ @@ -104,104 +115,114 @@ #define LOOPFCT FROM_LOOP #define BODY \ { \ - uint32_t ch = *inptr; \ + uint32_t ch; \ \ - if (ch < 0x80) \ - /* Plain ASCII character. */ \ - ++inptr; \ - else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \ + /* Determine whether there is a buffered character pending. */ \ + ch = *statep >> 3; \ + if (__builtin_expect (ch == 0, 1)) \ { \ - /* Two or three byte character. */ \ - uint32_t ch2; \ + /* No - so look at the next input byte. */ \ + ch = *inptr; \ \ - if (__builtin_expect (inptr + 1 >= inend, 0)) \ + if (ch < 0x80) \ + /* Plain ASCII character. */ \ + ++inptr; \ + else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \ { \ - /* The second byte is not available. */ \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ + /* Two or three byte character. */ \ + uint32_t ch2; \ \ - ch2 = inptr[1]; \ + if (__builtin_expect (inptr + 1 >= inend, 0)) \ + { \ + /* The second byte is not available. */ \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ \ - /* The second byte must be >= 0xa1 and <= 0xfe. */ \ - if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \ - { \ - /* This is an illegal character. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (1); \ - } \ + ch2 = inptr[1]; \ \ - if (ch == 0x8e) \ - { \ - /* Half-width katakana. */ \ - if (__builtin_expect (ch2 > 0xdf, 0)) \ - STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + /* The second byte must be >= 0xa1 and <= 0xfe. */ \ + if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \ + { \ + /* This is an illegal character. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + } \ \ - ch = ch2 + 0xfec0; \ - inptr += 2; \ - } \ - else \ - { \ - const unsigned char *endp; \ + if (ch == 0x8e) \ + { \ + /* Half-width katakana. */ \ + if (__builtin_expect (ch2 > 0xdf, 0)) \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ \ - if (ch == 0x8f) \ + ch = ch2 + 0xfec0; \ + inptr += 2; \ + } \ + else \ { \ - /* JISX 0213 plane 2. */ \ - uint32_t ch3; \ + const unsigned char *endp; \ \ - if (__builtin_expect (inptr + 2 >= inend, 0)) \ + if (ch == 0x8f) \ { \ - /* The third byte is not available. */ \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ + /* JISX 0213 plane 2. */ \ + uint32_t ch3; \ \ - ch3 = inptr[2]; \ - endp = inptr + 3; \ + if (__builtin_expect (inptr + 2 >= inend, 0)) \ + { \ + /* The third byte is not available. */ \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ \ - ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \ - } \ - else \ - { \ - /* JISX 0213 plane 1. */ \ - endp = inptr + 2; \ + ch3 = inptr[2]; \ + endp = inptr + 3; \ \ - ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \ - } \ + ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \ + } \ + else \ + { \ + /* JISX 0213 plane 1. */ \ + endp = inptr + 2; \ \ - if (ch == 0) \ - /* This is an illegal character. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \ + } \ \ - if (ch < 0x80) \ - { \ - /* It's a combining character. */ \ - uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ - uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ + if (ch == 0) \ + /* This is an illegal character. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ \ - /* See whether we have room for two characters. */ \ - if (outptr + 8 <= outend) \ + inptr = endp; \ + \ + if (ch < 0x80) \ { \ - inptr = endp; \ + /* It's a combining character. */ \ + uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ + uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ + \ put32 (outptr, u1); \ outptr += 4; \ - put32 (outptr, u2); \ - outptr += 4; \ - continue; \ - } \ - else \ - { \ + \ + /* See whether we have room for two characters. */ \ + if (outptr + 4 <= outend) \ + { \ + put32 (outptr, u2); \ + outptr += 4; \ + continue; \ + } \ + \ + /* Otherwise store only the first character now, and \ + put the second one into the queue. */ \ + *statep = u2 << 3; \ + /* Tell the caller why we terminate the loop. */ \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ } \ - \ - inptr = endp; \ } \ - } \ - else \ - { \ - /* This is illegal. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + else \ + { \ + /* This is illegal. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + } \ } \ \ put32 (outptr, ch); \ |