diff options
author | Ulrich Drepper <drepper@gmail.com> | 2012-01-08 07:19:21 -0500 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2012-01-08 07:19:21 -0500 |
commit | d3ed722566f42d3f614b1221a8e4f19092976531 (patch) | |
tree | 4a63e059ef599167cf407311188551fe72221d8d /iconv | |
parent | a0da5fe1e49b819b4d90b77915e21cddd397d064 (diff) | |
download | glibc-d3ed722566f42d3f614b1221a8e4f19092976531.zip glibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.gz glibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.bz2 |
Simplify char16_t implementation
Diffstat (limited to 'iconv')
-rw-r--r-- | iconv/gconv_builtin.h | 14 | ||||
-rw-r--r-- | iconv/gconv_int.h | 4 | ||||
-rw-r--r-- | iconv/gconv_simple.c | 388 | ||||
-rw-r--r-- | iconv/iconv_prog.c | 12 |
4 files changed, 4 insertions, 414 deletions
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index 6820f82..271a4be 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -122,17 +122,3 @@ BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, __gconv_transform_internal_ucs2reverse, NULL, 4, 4, 2, 2) #endif - - -BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "CHAR16", 1, "=ascii->CHAR16", - __gconv_transform_ascii_char16, NULL, 1, 1, 2, 4) - -BUILTIN_TRANSFORMATION ("CHAR16", "ANSI_X3.4-1968//", 1, "=CHAR16->ascii", - __gconv_transform_char16_ascii, NULL, 2, 4, 1, 1) - - -BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "CHAR16", 1, "=utf8->CHAR16", - __gconv_transform_utf8_char16, NULL, 1, 6, 2, 4) - -BUILTIN_TRANSFORMATION ("CHAR16", "ISO-10646/UTF8/", 1, "=CHAR16->utf8", - __gconv_transform_char16_utf8, NULL, 2, 4, 1, 6) diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 7508372..a2fcb93 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -303,10 +303,6 @@ __BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le); __BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal); __BUILTIN_TRANSFORM (__gconv_transform_internal_utf16); __BUILTIN_TRANSFORM (__gconv_transform_utf16_internal); -__BUILTIN_TRANSFORM (__gconv_transform_ascii_char16); -__BUILTIN_TRANSFORM (__gconv_transform_char16_ascii); -__BUILTIN_TRANSFORM (__gconv_transform_utf8_char16); -__BUILTIN_TRANSFORM (__gconv_transform_char16_utf8); # undef __BUITLIN_TRANSFORM /* Specialized conversion function for a single byte to INTERNAL, recognizing diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 028822c..6776160 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -1321,391 +1321,3 @@ ucs4le_internal_loop_single (struct __gconv_step *step, #define LOOP_NEED_FLAGS #include <iconv/loop.c> #include <iconv/skeleton.c> - - -/* Convert from ISO 646-IRV to the char16_t format. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 1 -#define MIN_NEEDED_TO 2 -#define FROM_DIRECTION 1 -#define FROM_LOOP ascii_char16_loop -#define TO_LOOP ascii_char16_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_ascii_char16 -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - if (__builtin_expect (*inptr > '\x7f', 0)) \ - { \ - /* The value is too large. We don't try transliteration here since \ - this is not an error because of the lack of possibilities to \ - represent the result. This is a genuine bug in the input since \ - ASCII does not allow such values. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (1); \ - } \ - else \ - { \ - /* It's an one byte sequence. */ \ - *((uint16_t *) outptr) = *inptr++; \ - outptr += sizeof (uint16_t); \ - } \ - } -#define LOOP_NEED_FLAGS -#include <iconv/loop.c> -#include <iconv/skeleton.c> - - -/* Convert from the char16_t format to ISO 646-IRV. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 2 -#define MIN_NEEDED_TO 1 -#define FROM_DIRECTION 1 -#define FROM_LOOP char16_ascii_loop -#define TO_LOOP char16_ascii_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_char16_ascii -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - if (__builtin_expect (*((const uint16_t *) inptr) > 0x7f, 0)) \ - { \ - UNICODE_TAG_HANDLER (*((const uint16_t *) inptr), 2); \ - STANDARD_TO_LOOP_ERR_HANDLER (2); \ - } \ - else \ - { \ - /* It's an one byte sequence. */ \ - *outptr++ = *((const uint16_t *) inptr); \ - inptr += 2; \ - } \ - } -#define LOOP_NEED_FLAGS -#include <iconv/loop.c> -#include <iconv/skeleton.c> - - -/* Convert from the char16_t format to UTF-8. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 2 -#define MAX_NEEDED_FROM 4 -#define MIN_NEEDED_TO 1 -#define MAX_NEEDED_TO 6 -#define FROM_DIRECTION 1 -#define FROM_LOOP char16_utf8_loop -#define TO_LOOP char16_utf8_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_char16_utf8 -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MAX_NEEDED_INPUT MAX_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - /* Yes, reading a 16-bit number and storing it as 32-bit is correct. */ \ - uint32_t wc = *((const uint16_t *) inptr); \ - inptr += 2; \ - \ - if (__builtin_expect (wc < 0x80, 1)) \ - /* It's an one byte sequence. */ \ - *outptr++ = (unsigned char) wc; \ - else \ - { \ - size_t step; \ - \ - if (__builtin_expect (wc < 0xd800 || wc > 0xdfff, 1)) \ - step = wc < 0x800 ? 2 : 3; \ - else \ - { \ - if (__builtin_expect (inptr + 2 > inend, 0)) \ - { \ - /* We don't have enough input for another complete input \ - character. */ \ - inptr -= 2; \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ - \ - uint32_t sec = *((const uint16_t *) inptr); \ - if (__builtin_expect (sec < 0xdc00, 0) \ - || __builtin_expect (sec > 0xdfff, 0)) \ - { \ - /* This is no valid second word for a surrogate. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (2); \ - } \ - inptr += 2; \ - wc = ((wc - 0xd7c0) << 10) + (sec - 0xdc00); \ - \ - step = wc < 0x200000 ? 4 : 5; \ - } \ - \ - if (__builtin_expect (outptr + step > outend, 0)) \ - { \ - /* Too long. */ \ - result = __GCONV_FULL_OUTPUT; \ - inptr -= step >= 4 ? 4 : 2; \ - break; \ - } \ - \ - unsigned char *start = outptr; \ - *outptr = (unsigned char) (~0xff >> step); \ - outptr += step; \ - do \ - { \ - start[--step] = 0x80 | (wc & 0x3f); \ - wc >>= 6; \ - } \ - while (step > 1); \ - start[0] |= wc; \ - } \ - } -#define LOOP_NEED_FLAGS -#include <iconv/loop.c> -#include <iconv/skeleton.c> - - -/* Convert from UTF-8 to the char16_t format. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 1 -#define MAX_NEEDED_FROM 6 -#define MIN_NEEDED_TO 2 -#define MAX_NEEDED_TO 4 -#define FROM_DIRECTION 1 -#define FROM_LOOP utf8_char16_loop -#define TO_LOOP utf8_char16_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_utf8_char16 -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MAX_NEEDED_INPUT MAX_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - /* Next input byte. */ \ - uint32_t ch = *inptr; \ - \ - if (__builtin_expect (ch < 0x80, 1)) \ - { \ - /* One byte sequence. */ \ - *((uint16_t *) outptr) = ch; \ - outptr += 2; \ - ++inptr; \ - } \ - else \ - { \ - uint_fast32_t cnt; \ - uint_fast32_t i; \ - \ - if (ch >= 0xc2 && ch < 0xe0) \ - { \ - /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ - otherwise the wide character could have been represented \ - using a single byte. */ \ - cnt = 2; \ - ch &= 0x1f; \ - } \ - else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ - { \ - /* We expect three bytes. */ \ - cnt = 3; \ - ch &= 0x0f; \ - } \ - else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ - { \ - /* We expect four bytes. */ \ - cnt = 4; \ - ch &= 0x07; \ - } \ - else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ - { \ - /* We expect five bytes. */ \ - cnt = 5; \ - ch &= 0x03; \ - } \ - else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \ - { \ - /* We expect six bytes. */ \ - cnt = 6; \ - ch &= 0x01; \ - } \ - else \ - { \ - /* Search the end of this ill-formed UTF-8 character. This \ - is the next byte with (x & 0xc0) != 0x80. */ \ - i = 0; \ - do \ - ++i; \ - while (inptr + i < inend \ - && (*(inptr + i) & 0xc0) == 0x80 \ - && i < 5); \ - \ - errout: \ - STANDARD_FROM_LOOP_ERR_HANDLER (i); \ - } \ - \ - if (__builtin_expect (inptr + cnt > inend, 0)) \ - { \ - /* We don't have enough input. But before we report that check \ - that all the bytes are correct. */ \ - for (i = 1; inptr + i < inend; ++i) \ - if ((inptr[i] & 0xc0) != 0x80) \ - break; \ - \ - if (__builtin_expect (inptr + i == inend, 1)) \ - { \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ - \ - goto errout; \ - } \ - \ - /* Read the possible remaining bytes. */ \ - for (i = 1; i < cnt; ++i) \ - { \ - uint32_t byte = inptr[i]; \ - \ - if ((byte & 0xc0) != 0x80) \ - /* This is an illegal encoding. */ \ - break; \ - \ - ch <<= 6; \ - ch |= byte & 0x3f; \ - } \ - \ - /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ - If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ - have been represented with fewer than cnt bytes. */ \ - if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ - /* Do not accept UTF-16 surrogates. */ \ - || (ch >= 0xd800 && ch <= 0xdfff)) \ - { \ - /* This is an illegal encoding. */ \ - goto errout; \ - } \ - \ - /* Now adjust the pointers and store the result. */ \ - if (ch < 0x10000) \ - *((uint16_t *) outptr) = ch; \ - else \ - { \ - if (__builtin_expect (outptr + 4 > outend, 0)) \ - { \ - result = __GCONV_FULL_OUTPUT; \ - break; \ - } \ - \ - *((uint16_t *) outptr) = 0xd7c0 + (ch >> 10); \ - outptr += 2; \ - *((uint16_t *) outptr) = 0xdc00 + (ch & 0x3ff); \ - } \ - \ - outptr += 2; \ - inptr += cnt; \ - } \ - } -#define LOOP_NEED_FLAGS - -#define STORE_REST \ - { \ - /* We store the remaining bytes while converting them into the UCS4 \ - format. We can assume that the first byte in the buffer is \ - correct and that it requires a larger number of bytes than there \ - are in the input buffer. */ \ - wint_t ch = **inptrp; \ - size_t cnt, r; \ - \ - state->__count = inend - *inptrp; \ - \ - assert (ch != 0xc0 && ch != 0xc1); \ - if (ch >= 0xc2 && ch < 0xe0) \ - { \ - /* We expect two bytes. The first byte cannot be 0xc0 or \ - 0xc1, otherwise the wide character could have been \ - represented using a single byte. */ \ - cnt = 2; \ - ch &= 0x1f; \ - } \ - else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ - { \ - /* We expect three bytes. */ \ - cnt = 3; \ - ch &= 0x0f; \ - } \ - else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ - { \ - /* We expect four bytes. */ \ - cnt = 4; \ - ch &= 0x07; \ - } \ - else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ - { \ - /* We expect five bytes. */ \ - cnt = 5; \ - ch &= 0x03; \ - } \ - else \ - { \ - /* We expect six bytes. */ \ - cnt = 6; \ - ch &= 0x01; \ - } \ - \ - /* The first byte is already consumed. */ \ - r = cnt - 1; \ - while (++(*inptrp) < inend) \ - { \ - ch <<= 6; \ - ch |= **inptrp & 0x3f; \ - --r; \ - } \ - \ - /* Shift for the so far missing bytes. */ \ - ch <<= r * 6; \ - \ - /* Store the number of bytes expected for the entire sequence. */ \ - state->__count |= cnt << 8; \ - \ - /* Store the value. */ \ - state->__value.__wch = ch; \ - } - -#define UNPACK_BYTES \ - { \ - static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ - wint_t wch = state->__value.__wch; \ - size_t ntotal = state->__count >> 8; \ - \ - inlen = state->__count & 255; \ - \ - bytebuf[0] = inmask[ntotal - 2]; \ - \ - do \ - { \ - if (--ntotal < inlen) \ - bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ - wch >>= 6; \ - } \ - while (ntotal > 1); \ - \ - bytebuf[0] |= wch; \ - } - -#define CLEAR_STATE \ - state->__count = 0 - - -#include <iconv/loop.c> -#include <iconv/skeleton.c> diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 13facc8..0d62a07 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -719,12 +719,9 @@ add_known_names (struct gconv_module *node) add_known_names (node->right); do { - if (strcmp (node->from_string, "INTERNAL") != 0 - && strcmp (node->from_string, "CHAR16") != 0) - tsearch (node->from_string, &printlist, - (__compar_fn_t) strverscmp); - if (strcmp (node->to_string, "INTERNAL") != 0 - && strcmp (node->to_string, "CHAR16") != 0) + if (strcmp (node->from_string, "INTERNAL") != 0) + tsearch (node->from_string, &printlist, (__compar_fn_t) strverscmp); + if (strcmp (node->to_string, "INTERNAL") != 0) tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp); node = node->same; @@ -750,8 +747,7 @@ insert_cache (void) { const char *str = strtab + hashtab[cnt].string_offset; - if (strcmp (str, "INTERNAL") != 0 - && strcmp (str, "CHAR16") != 0) + if (strcmp (str, "INTERNAL") != 0) tsearch (str, &printlist, (__compar_fn_t) strverscmp); } } |