diff options
39 files changed, 565 insertions, 176 deletions
@@ -1,8 +1,73 @@ +1998-04-15 16:41 Ulrich Drepper <drepper@cygnus.com> + + Don't name internal representation since it might be different from + the external form (namely on little endian machines). + * iconv/gconv_builtin.h: Add UCS4 support. Change references to + UCS4 into references to INTERNAL. + * iconv/gconv_simple.c: Implement UCS4<->INTERNAL converters. + Add endianess support to UCS functions. Change references to + UCS4 into references to INTERNAL. + * iconv/gconv_int.h: Change references to UCS4 into references to + INTERNAL. + * iconv/iconv_prog.c: Don't mention INTERNAL in --list output. + * iconvdata/gconv-modules: Change accordingly. + * wcsmbs/wcsmbsload.c: Change names to use INTERNAL. + + * iconv/gconv_simple.c: Adjust input buffer pointer for output buffer + overflow. + * iconvdata/8bit-gap.c: Likewise. + * iconvdata/8bit-generic.c: Likewise. + * iconvdata/big5.c: Likewise. + * iconvdata/euccn.c: Likewise. + * iconvdata/eucjp.c: Likewise. + * iconvdata/euckr.c: Likewise. + * iconvdata/euctw.c: Likewise. + * iconvdata/iso646.c: Likewise. + * iconvdata/iso6937.c: Likewise. + * iconvdata/iso8859-1.c: Likewise. + * iconvdata/johab.c: Likewise. + * iconvdata/sjis.c: Likewise. + * iconvdata/t61.c: Likewise. + * iconvdata/uhc.c: Likewise. + + * iconvdata/8bit-gap.c: Correct access to to_ucs4 array. + * iconvdata/8bit-generic.c: Likewise. + + * iconvdata/TESTS: Add more tests. + + * sysdeps/i386/bits/byteswap.h: Change to use "=r" when ror is used. + +1998-04-15 11:47 Ulrich Drepper <drepper@cygnus.com> + + * iconvdata/Makefile: Better rules to run tests. + + * iconvdata/testdata/ISO-8859-1..UTF8: New file. + * iconvdata/testdata/ISO-8859-10: Likewise. + * iconvdata/testdata/ISO-8859-10..UCS2: Likewise. + * iconvdata/testdata/ISO-8859-2: Likewise. + * iconvdata/testdata/ISO-8859-2..UCS4: Likewise. + * iconvdata/testdata/ISO-8859-2..UTF8: Likewise. + * iconvdata/testdata/ISO-8859-3: Likewise. + * iconvdata/testdata/ISO-8859-4: Likewise. + * iconvdata/testdata/ISO-8859-5: Likewise. + * iconvdata/testdata/ISO-8859-6: Likewise. + * iconvdata/testdata/ISO-8859-7: Likewise. + * iconvdata/testdata/ISO-8859-8: Likewise. + * iconvdata/testdata/ISO-8859-9: Likewise. + 1998-04-15 Ulrich Drepper <drepper@cygnus.com> + * iconvdata/run-iconv-test.sh: Handle $from..$t file to compare + intermediate result (if available). + * iconv/gconv_simple.c (__gconv_transform_ucs4_ascii): Fix typo in last change. + * iconvdata/Makefile: Add rules to run run-iconv-test.sh. + (distribute): Add run-iconv-test.sh and testdata/*. + + * stdlib/testmb.c (main): Simplify mbc array handling. + 1998-04-14 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * elf/dl-minimal.c (__strtol_internal): Correct range check. Fix @@ -27,6 +92,7 @@ * iconvdata/Makefile: Add rules to run tests. * iconvdata/TESTS: New file. * iconvdata/run-iconv-test.sh: New file. + * iconvdata/testdata/ISO-8859-1: New file. * iconv/iconv_prog.c (main): Call process_block with OUTPUT stream, not stdout. diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index 9c98c35..265dca1 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -18,27 +18,41 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +BUILTIN_ALIAS ("UCS4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("UCS-4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("ISO-10646//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993/UCS4/", "ISO-10646/UCS4/") + +BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, + "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4", + __gconv_transform_internal_ucs4, NULL, NULL) +BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, + "INTERNAL", 1, "=ucs4->INTERNAL", + __gconv_transform_internal_ucs4, NULL, NULL) +/* Please note that we need only one function for both direction. */ + BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/") BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/") -BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, - "ISO-10646/UTF8/", 1, "=ucs4->utf8", - __gconv_transform_ucs4_utf8, NULL, NULL) +BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, + "ISO-10646/UTF8/", 1, "=INTERNAL->utf8", + __gconv_transform_internal_utf8, NULL, NULL) BUILTIN_TRANSFORMATION ("ISO-10646/UTF-?8/", "ISO-10646/UTF", 13, - "ISO-10646/UCS4/", 1, "=utf8->ucs4", - __gconv_transform_utf8_ucs4, NULL, NULL) + "INTERNAL", 1, "=utf8->INTERNAL", + __gconv_transform_utf8_internal, NULL, NULL) BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/") BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/") -BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "ISO-10646/UCS4/", - 1, "=ucs2->ucs4", - __gconv_transform_ucs2_ucs4, NULL, NULL) +BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "INTERNAL", + 1, "=ucs2->INTERNAL", + __gconv_transform_ucs2_internal, NULL, NULL) -BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "ISO-10646/UCS2/", - 1, "=ucs4->ucs2", - __gconv_transform_ucs4_ucs2, NULL, NULL) +BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "ISO-10646/UCS2/", + 1, "=INTERNAL->ucs2", + __gconv_transform_internal_ucs2, NULL, NULL) BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy", __gconv_transform_dummy, NULL, NULL) diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 35ec31a..a1475f8 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -153,12 +153,13 @@ extern void __gconv_get_builtin_trans (const char *__name, int __do_flush) __BUILTIN_TRANS (__gconv_transform_dummy); -__BUILTIN_TRANS (__gconv_transform_ascii_ucs4); -__BUILTIN_TRANS (__gconv_transform_ucs4_ascii); -__BUILTIN_TRANS (__gconv_transform_ucs4_utf8); -__BUILTIN_TRANS (__gconv_transform_utf8_ucs4); -__BUILTIN_TRANS (__gconv_transform_ucs2_ucs4); -__BUILTIN_TRANS (__gconv_transform_ucs4_ucs2); +__BUILTIN_TRANS (__gconv_transform_ascii_internal); +__BUILTIN_TRANS (__gconv_transform_internal_ascii); +__BUILTIN_TRANS (__gconv_transform_utf8_internal); +__BUILTIN_TRANS (__gconv_transform_internal_utf8); +__BUILTIN_TRANS (__gconv_transform_ucs2_internal); +__BUILTIN_TRANS (__gconv_transform_internal_ucs2); +__BUILTIN_TRANS (__gconv_transform_internal_ucs4); # undef __BUITLIN_TRANS #endif diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 38b6b56..b72e61e 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -18,6 +18,8 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <byteswap.h> +#include <endian.h> #include <errno.h> #include <gconv.h> #include <stdint.h> @@ -76,16 +78,21 @@ __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data, } -/* Convert from ISO 646-IRV to ISO 10646/UCS4. */ +/* Transform from the internal, UCS4-like format, to UCS4. The + difference between the internal ucs4 format and the real UCS4 + format is, if any, the endianess. The Unicode/ISO 10646 says that + unless some higher protocol specifies it differently, the byte + order is big endian.*/ int -__gconv_transform_ascii_ucs4 (struct gconv_step *step, - struct gconv_step_data *data, const char *inbuf, - size_t *inlen, size_t *written, int do_flush) +__gconv_transform_internal_ucs4 (struct gconv_step *step, + struct gconv_step_data *data, + const char *inbuf, size_t *inlen, + size_t *written, int do_flush) { struct gconv_step *next_step = step + 1; struct gconv_step_data *next_data = data + 1; gconv_fct fct = next_step->fct; - size_t do_write; + size_t do_write = 0; int result; /* If the function is called with no input this means we have to reset @@ -95,7 +102,6 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step, { /* Clear the state. */ memset (data->statep, '\0', sizeof (mbstate_t)); - do_write = 0; /* Call the steps down the chain if there are any. */ if (data->is_last) @@ -114,12 +120,126 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step, else { int save_errno = errno; - do_write = 0; result = GCONV_OK; do { - const unsigned char *newinbuf = inbuf; + size_t n_convert = (MIN (*inlen, + (data->outbufsize - data->outbufavail)) + / sizeof (wchar_t)); + +#if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail]; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt) + outbuf[cnt] = bswap_32 (((wchar_t *) inbuf)[cnt]); + +#elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + memcpy (&data->outbuf[data->outbufsize], inbuf, + n_convert * sizeof (wchar_t)); +#else +# error "This endianess is not supported." +#endif + + *inlen -= n_convert * sizeof (wchar_t); + inbuf += n_convert * sizeof (wchar_t); + data->outbufavail += n_convert * sizeof (wchar_t); + do_write += n_convert; + + if (*inlen > 0 && *inlen < sizeof (wchar_t)) + { + /* We have an incomplete character at the end. */ + result = GCONV_INCOMPLETE_INPUT; + break; + } + + if (data->is_last) + { + /* This is the last step. */ + result = (*inlen < sizeof (wchar_t) + ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT); + break; + } + + /* Status so far. */ + result = GCONV_EMPTY_INPUT; + + if (data->outbufavail > 0) + { + /* Call the functions below in the chain. */ + size_t newavail = data->outbufavail; + + result = (*fct) (next_step, next_data, data->outbuf, &newavail, + written, 0); + + /* Correct the output buffer. */ + if (newavail != data->outbufavail && newavail > 0) + { + memmove (data->outbuf, + &data->outbuf[data->outbufavail - newavail], + newavail); + data->outbufavail = newavail; + } + } + } + while (*inlen >= sizeof (wchar_t) && result == GCONV_EMPTY_INPUT); + + __set_errno (save_errno); + } + + if (written != NULL && data->is_last) + *written = do_write; + + return result; +} + + +/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ +int +__gconv_transform_ascii_internal (struct gconv_step *step, + struct gconv_step_data *data, + const char *inbuf, size_t *inlen, + size_t *written, int do_flush) +{ + struct gconv_step *next_step = step + 1; + struct gconv_step_data *next_data = data + 1; + gconv_fct fct = next_step->fct; + size_t do_write = 0; + int result; + + /* If the function is called with no input this means we have to reset + to the initial state. The possibly partly converted input is + dropped. */ + if (do_flush) + { + /* Clear the state. */ + memset (data->statep, '\0', sizeof (mbstate_t)); + + /* Call the steps down the chain if there are any. */ + if (data->is_last) + result = GCONV_OK; + else + { + struct gconv_step *next_step = step + 1; + struct gconv_step_data *next_data = data + 1; + + result = (*fct) (next_step, next_data, NULL, 0, written, 1); + + /* Clear output buffer. */ + data->outbufavail = 0; + } + } + else + { + const unsigned char *newinbuf = inbuf; + int save_errno = errno; + + result = GCONV_OK; + do + { size_t actually = 0; size_t cnt = 0; @@ -193,9 +313,10 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step, /* Convert from ISO 10646/UCS to ISO 646-IRV. */ int -__gconv_transform_ucs4_ascii (struct gconv_step *step, - struct gconv_step_data *data, const char *inbuf, - size_t *inlen, size_t *written, int do_flush) +__gconv_transform_internal_ascii (struct gconv_step *step, + struct gconv_step_data *data, + const char *inbuf, size_t *inlen, + size_t *written, int do_flush) { struct gconv_step *next_step = step + 1; struct gconv_step_data *next_data = data + 1; @@ -228,13 +349,13 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step, } else { + const wchar_t *newinbuf = (const wchar_t *) inbuf; int save_errno = errno; do_write = 0; result = GCONV_OK; do { - const wchar_t *newinbuf = (const wchar_t *) inbuf; size_t actually = 0; size_t cnt = 0; @@ -264,11 +385,18 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step, if (result != GCONV_OK) break; + /* Check for incomplete input. */ + if (*inlen > 0 && *inlen < sizeof (wchar_t)) + { + /* We have an incomplete character at the end. */ + result = GCONV_INCOMPLETE_INPUT; + break; + } + if (data->is_last) { /* This is the last step. */ - result = (*inlen < sizeof (wchar_t) - ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT); + result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT; break; } @@ -306,9 +434,10 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step, int -__gconv_transform_ucs4_utf8 (struct gconv_step *step, - struct gconv_step_data *data, const char *inbuf, - size_t *inlen, size_t *written, int do_flush) +__gconv_transform_internal_utf8 (struct gconv_step *step, + struct gconv_step_data *data, + const char *inbuf, size_t *inlen, + size_t *written, int do_flush) { struct gconv_step *next_step = step + 1; struct gconv_step_data *next_data = data + 1; @@ -341,13 +470,13 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step, } else { + const wchar_t *newinbuf = (const wchar_t *) inbuf; int save_errno = errno; do_write = 0; result = GCONV_OK; do { - const wchar_t *newinbuf = (const wchar_t *) inbuf; size_t cnt = 0; while (data->outbufavail < data->outbufsize @@ -397,16 +526,24 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step, /* Remember how much we converted. */ do_write += cnt; *inlen -= cnt * sizeof (wchar_t); + newinbuf += cnt; /* Check whether an illegal character appeared. */ if (result != GCONV_OK) break; + /* Check for incomplete input. */ + if (*inlen > 0 && *inlen < sizeof (wchar_t)) + { + /* We have an incomplete character at the end. */ + result = GCONV_INCOMPLETE_INPUT; + break; + } + if (data->is_last) { /* This is the last step. */ - result = (*inlen < sizeof (wchar_t) - ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT); + result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT; break; } @@ -444,9 +581,10 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step, int -__gconv_transform_utf8_ucs4 (struct gconv_step *step, - struct gconv_step_data *data, const char *inbuf, - size_t *inlen, size_t *written, int do_flush) +__gconv_transform_utf8_internal (struct gconv_step *step, + struct gconv_step_data *data, + const char *inbuf, size_t *inlen, + size_t *written, int do_flush) { struct gconv_step *next_step = step + 1; struct gconv_step_data *next_data = data + 1; @@ -578,6 +716,7 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step, /* Remember how much we converted. */ do_write += actually; *inlen -= cnt; + inbuf += cnt; data->outbufavail += actually * sizeof (wchar_t); @@ -588,7 +727,7 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step, break; } - if (*inlen < extra) + if (*inlen > 0 && *inlen < extra) { /* We have an incomplete character at the end. */ result = GCONV_INCOMPLETE_INPUT; @@ -637,9 +776,10 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step, int -__gconv_transform_ucs2_ucs4 (struct gconv_step *step, - struct gconv_step_data *data, const char *inbuf, - size_t *inlen, size_t *written, int do_flush) +__gconv_transform_ucs2_internal (struct gconv_step *step, + struct gconv_step_data *data, + const char *inbuf, size_t *inlen, + size_t *written, int do_flush) { struct gconv_step *next_step = step + 1; struct gconv_step_data *next_data = data + 1; @@ -669,12 +809,12 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step, } else { + const uint16_t *newinbuf = (const uint16_t *) inbuf; int save_errno = errno; do_write = 0; do { - const uint16_t *newinbuf = (const uint16_t *) inbuf; wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail]; size_t actually = 0; @@ -683,34 +823,29 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step, while (data->outbufavail + 4 <= data->outbufsize && *inlen >= 2) { - outbuf[actually++] = *newinbuf++; +#if __BYTE_ORDER == __LITTLE_ENDIAN + outbuf[actually++] = (wchar_t) bswap_16 (*newinbuf++); +#else + outbuf[actually++] = (wchar_t) *newinbuf++; +#endif data->outbufavail += 4; *inlen -= 2; } - if (*inlen != 1) - { - /* We have an incomplete input character. */ - mbstate_t *state = data->statep; - state->count = 1; - state->value = *(uint8_t *) newinbuf; - --*inlen; - } - /* Remember how much we converted. */ do_write += actually * sizeof (wchar_t); - /* Check whether an illegal character appeared. */ - if (errno != 0) + if (*inlen == 1) { - result = GCONV_ILLEGAL_INPUT; + /* We have an incomplete character at the end. */ + result = GCONV_INCOMPLETE_INPUT; break; } - if (*inlen == 0 && !__mbsinit (data->statep)) + /* Check whether an illegal character appeared. */ + if (errno != 0) { - /* We have an incomplete character at the end. */ - result = GCONV_INCOMPLETE_INPUT; + result = GCONV_ILLEGAL_INPUT; break; } @@ -756,9 +891,10 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step, int -__gconv_transform_ucs4_ucs2 (struct gconv_step *step, - struct gconv_step_data *data, const char *inbuf, - size_t *inlen, size_t *written, int do_flush) +__gconv_transform_internal_ucs2 (struct gconv_step *step, + struct gconv_step_data *data, + const char *inbuf, size_t *inlen, + size_t *written, int do_flush) { struct gconv_step *next_step = step + 1; struct gconv_step_data *next_data = data + 1; @@ -791,12 +927,12 @@ __gconv_transform_ucs4_ucs2 (struct gconv_step *step, } else { + const wchar_t *newinbuf = (const wchar_t *) inbuf; int save_errno = errno; do_write = 0; do { - const wchar_t *newinbuf = (const wchar_t *) inbuf; uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail]; size_t actually = 0; @@ -810,39 +946,33 @@ __gconv_transform_ucs4_ucs2 (struct gconv_step *step, __set_errno (EILSEQ); break; } - outbuf[actually++] = (wchar_t) *newinbuf; +#if __BYTE_ORDER == __LITTLE_ENDIAN + /* Please note that we use the `uint32_t' pointer as a + `uint16_t' pointer which works since we are on a + little endian machine. */ + outbuf[actually++] = bswap_16 (*((uint16_t *) newinbuf)); + ++newinbuf; +#else + outbuf[actually++] = *newinbuf++; +#endif *inlen -= 4; data->outbufavail += 2; } - if (*inlen < 4) - { - /* We have an incomplete input character. */ - mbstate_t *state = data->statep; - state->count = *inlen; - state->value = 0; - while (*inlen > 0) - { - state->value <<= 8; - state->value += *(uint8_t *) newinbuf; - --*inlen; - } - } - /* Remember how much we converted. */ do_write += (const char *) newinbuf - inbuf; - /* Check whether an illegal character appeared. */ - if (errno != 0) + if (*inlen > 0 && *inlen < 4) { - result = GCONV_ILLEGAL_INPUT; + /* We have an incomplete input character. */ + result = GCONV_INCOMPLETE_INPUT; break; } - if (*inlen == 0 && !__mbsinit (data->statep)) + /* Check whether an illegal character appeared. */ + if (errno != 0) { - /* We have an incomplete character at the end. */ - result = GCONV_INCOMPLETE_INPUT; + result = GCONV_ILLEGAL_INPUT; break; } diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 0c1b9d0..569bd3b 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -509,14 +509,17 @@ print_known_names (void) { if (__gconv_modules_db[cnt]->from_pattern == NULL) { - tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist, - (__compar_fn_t) strcoll); - tsearch (__gconv_modules_db[cnt]->to_string, &printlist, - (__compar_fn_t) strcoll); + if (strcmp (__gconv_modules_db[cnt]->from_constpfx, "INTERNAL")) + tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist, + (__compar_fn_t) strcoll); + if (strcmp (__gconv_modules_db[cnt]->to_string, "INTERNAL")) + tsearch (__gconv_modules_db[cnt]->to_string, &printlist, + (__compar_fn_t) strcoll); } else - tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist, - (__compar_fn_t) strcoll); + if (strcmp (__gconv_modules_db[cnt]->from_pattern, "INTERNAL")) + tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist, + (__compar_fn_t) strcoll); } fputs (_("\ diff --git a/iconvdata/8bit-gap.c b/iconvdata/8bit-gap.c index 6c78ce5..a8d3c99 100644 --- a/iconvdata/8bit-gap.c +++ b/iconvdata/8bit-gap.c @@ -110,7 +110,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, while (cnt < inchars && (outwchars + sizeof (wchar_t) <= data->outbufsize)) { - wchar_t ch = to_ucs4[(unsigned int) inbuf[cnt]]; + wchar_t ch = to_ucs4[((unsigned char *) inbuf)[cnt]]; if (ch == L'\0' && inbuf[cnt] != '\0') { @@ -125,6 +125,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -158,6 +159,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars < data->outbufsize) diff --git a/iconvdata/8bit-generic.c b/iconvdata/8bit-generic.c index 52cd540..19194ad 100644 --- a/iconvdata/8bit-generic.c +++ b/iconvdata/8bit-generic.c @@ -97,7 +97,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, while (cnt < inchars && (outwchars + sizeof (wchar_t) <= data->outbufsize)) { - wchar_t ch = to_ucs4[(unsigned int) inbuf[cnt]]; + wchar_t ch = to_ucs4[((unsigned char *) inbuf)[cnt]]; if (ch == L'\0' && inbuf[cnt] != '\0') { @@ -112,6 +112,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -136,6 +137,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars < data->outbufsize) diff --git a/iconvdata/Makefile b/iconvdata/Makefile index 48d4495..dd1c391 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -94,7 +94,8 @@ distribute := 8bit-generic.c 8bit-gap.c gap.pl gaptab.pl gconv-modules \ ebcdic-at-de-a.c ebcdic-ca-fr.c jis0201.c jis0208.c jis0212.c \ extra-module.mk euckr.c johab.c uhc.c ksc5601.c ksc5601.h \ iso646.c big5.c eucjp.c gb2312.c gb2312.h euccn.c euctw.c \ - cns11643l1.c cns11643l1.h cns11643.h cns11643.c + cns11643l1.c cns11643l1.h cns11643.h cns11643.c \ + run-iconv-test.sh $(wildcard testdata/*) # We build the transformation modules only when we build shared libs. ifeq (yes,$(build-shared)) @@ -208,3 +209,14 @@ $(inst_gconvdir)/gconv-modules: gconv-modules $(+force) endif include ../Rules + +.PHONY: do-iconv-test +tests: do-iconv-test + +do-iconv-test: run-iconv-test.sh $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) \ + $(common-objdir)/iconv/iconv_prog + $(SHELL) -e $< $(common-objdir) > $(objpfx)iconv-test.out + +$(objpfx)gconv-modules: gconv-modules + cp $^ $@ diff --git a/iconvdata/TESTS b/iconvdata/TESTS index 809104d..41d7edc 100644 --- a/iconvdata/TESTS +++ b/iconvdata/TESTS @@ -28,3 +28,12 @@ # N. target coded character set. ISO-8859-1 ISO-8859-1 UTF8 +ISO-8859-2 ISO-8859-2 UCS4 UTF8 +ISO-8859-3 ISO-8859-3 UTF8 +ISO-8859-4 ISO-8859-4 UTF8 +ISO-8859-5 ISO-8859-5 UTF8 +ISO-8859-6 ISO-8859-6 UTF8 +ISO-8859-7 ISO-8859-7 UTF8 +ISO-8859-8 ISO-8859-8 UTF8 +ISO-8859-9 ISO-8859-9 UTF8 +ISO-8859-10 ISO-8859-10 UCS2 UTF8 diff --git a/iconvdata/big5.c b/iconvdata/big5.c index bfd14fd..a6a2580 100644 --- a/iconvdata/big5.c +++ b/iconvdata/big5.c @@ -8540,6 +8540,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -8632,6 +8633,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/euccn.c b/iconvdata/euccn.c index e8d9097..f683836f 100644 --- a/iconvdata/euccn.c +++ b/iconvdata/euccn.c @@ -163,6 +163,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -212,6 +213,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/eucjp.c b/iconvdata/eucjp.c index 4e82904..e6a71cc 100644 --- a/iconvdata/eucjp.c +++ b/iconvdata/eucjp.c @@ -180,6 +180,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -256,6 +257,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/euckr.c b/iconvdata/euckr.c index 1dfa42d..2ad9478 100644 --- a/iconvdata/euckr.c +++ b/iconvdata/euckr.c @@ -182,6 +182,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -226,6 +227,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/euctw.c b/iconvdata/euctw.c index f38db47..fd422c1 100644 --- a/iconvdata/euctw.c +++ b/iconvdata/euctw.c @@ -180,6 +180,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -252,6 +253,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/gconv-modules b/iconvdata/gconv-modules index ffdd8c3..d5109dc 100644 --- a/iconvdata/gconv-modules +++ b/iconvdata/gconv-modules @@ -34,11 +34,6 @@ # name: the real name of the character set # from to module cost -alias ISO-10646// ISO-10646/UCS4/ -alias 10646-1:1993// ISO-10646/UCS4/ -alias 10646-1:1993/UCS4/ ISO-10646/UCS4/ - -# from to module cost alias ISO-IR-6// ANSI_X3.4-1968// alias ANSI_X3.4-1986// ANSI_X3.4-1968// alias ISO_646.IRV:1991// ANSI_X3.4-1968// @@ -48,15 +43,15 @@ alias US-ASCII// ANSI_X3.4-1968// alias US// ANSI_X3.4-1968// alias IBM367// ANSI_X3.4-1968// alias CP367// ANSI_X3.4-1968// -module ANSI_X3.4-1968// ISO-10646/UCS4/ ISO646 2 -module ISO-10646/UCS4/ ANSI_X3.4-1968// ISO646 2 +module ANSI_X3.4-1968// INTERNAL ISO646 2 +module INTERNAL ANSI_X3.4-1968// ISO646 2 alias ISO-IR-4// BS_4730// alias ISO646-GB// BS_4730// alias GB// BS_4730// alias UK// BS_4730// -module BS_4730// ISO-10646/UCS4/ ISO646 2 -module ISO-10646/UCS4/ BS_4730// ISO646 2 +module BS_4730// INTERNAL ISO646 2 +module INTERNAL BS_4730// ISO646 2 # from to module cost alias ISO-IR-100// ISO-8859-1// @@ -66,8 +61,8 @@ alias LATIN1// ISO-8859-1// alias L1// ISO-8859-1// alias IBM819// ISO-8859-1// alias CP819// ISO-8859-1// -module ISO-8859-1// ISO-10646/UCS4/ ISO8859-1 1 -module ISO-10646/UCS4/ ISO-8859-1// ISO8859-1 1 +module ISO-8859-1// INTERNAL ISO8859-1 1 +module INTERNAL ISO-8859-1// ISO8859-1 1 # from to module cost alias ISO-IR-101// ISO-8859-2// @@ -75,8 +70,8 @@ alias ISO_8859-2:1987// ISO-8859-2// alias ISO_8859-2// ISO-8859-2// alias LATIN2// ISO-8859-2// alias L2// ISO-8859-2// -module ISO-8859-2// ISO-10646/UCS4/ ISO8859-2 1 -module ISO-10646/UCS4/ ISO-8859-2// ISO8859-2 1 +module ISO-8859-2// INTERNAL ISO8859-2 1 +module INTERNAL ISO-8859-2// ISO8859-2 1 # from to module cost alias ISO-IR-109// ISO-8859-3// @@ -84,8 +79,8 @@ alias ISO_8859-3:1988// ISO-8859-3// alias ISO_8859-3// ISO-8859-3// alias LATIN3// ISO-8859-3// alias L3// ISO-8859-3// -module ISO-8859-3// ISO-10646/UCS4/ ISO8859-3 1 -module ISO-10646/UCS4/ ISO-8859-3// ISO8859-3 1 +module ISO-8859-3// INTERNAL ISO8859-3 1 +module INTERNAL ISO-8859-3// ISO8859-3 1 # from to module cost alias ISO-IR-110// ISO-8859-4// @@ -93,16 +88,16 @@ alias ISO_8859-4:1988// ISO-8859-4// alias ISO_8859-4// ISO-8859-4// alias LATIN4// ISO-8859-4// alias L4// ISO-8859-4// -module ISO-8859-4// ISO-10646/UCS4/ ISO8859-4 1 -module ISO-10646/UCS4/ ISO-8859-4// ISO8859-4 1 +module ISO-8859-4// INTERNAL ISO8859-4 1 +module INTERNAL ISO-8859-4// ISO8859-4 1 # from to module cost alias ISO-IR-144// ISO-8859-5// alias ISO_8859-5:1988// ISO-8859-5// alias ISO_8859-5// ISO-8859-5// alias CYRILLIC// ISO-8859-5// -module ISO-8859-5// ISO-10646/UCS4/ ISO8859-5 1 -module ISO-10646/UCS4/ ISO-8859-5// ISO8859-5 1 +module ISO-8859-5// INTERNAL ISO8859-5 1 +module INTERNAL ISO-8859-5// ISO8859-5 1 # from to module cost alias ISO-IR-127// ISO-8859-6// @@ -111,8 +106,8 @@ alias ISO_8859-6// ISO-8859-6// alias ECMA-114// ISO-8859-6// alias ASMO-708// ISO-8859-6// alias ARABIC// ISO-8859-6// -module ISO-8859-6// ISO-10646/UCS4/ ISO8859-6 1 -module ISO-10646/UCS4/ ISO-8859-6// ISO8859-6 1 +module ISO-8859-6// INTERNAL ISO8859-6 1 +module INTERNAL ISO-8859-6// ISO8859-6 1 # from to module cost alias ISO-IR-126// ISO-8859-7// @@ -122,16 +117,16 @@ alias ELOT_928// ISO-8859-7// alias ECMA-118// ISO-8859-7// alias GREEK// ISO-8859-7// alias GREEK8// ISO-8859-7// -module ISO-8859-7// ISO-10646/UCS4/ ISO8859-7 1 -module ISO-10646/UCS4/ ISO-8859-7// ISO8859-7 1 +module ISO-8859-7// INTERNAL ISO8859-7 1 +module INTERNAL ISO-8859-7// ISO8859-7 1 # from to module cost alias ISO-IR-138// ISO-8859-8// alias ISO_8859-8:1988// ISO-8859-8// alias ISO_8859-8// ISO-8859-8// alias HEBREW// ISO-8859-8// -module ISO-8859-8// ISO-10646/UCS4/ ISO8859-8 1 -module ISO-10646/UCS4/ ISO-8859-8// ISO8859-8 1 +module ISO-8859-8// INTERNAL ISO8859-8 1 +module INTERNAL ISO-8859-8// ISO8859-8 1 # from to module cost alias ISO-IR-148// ISO-8859-9// @@ -139,8 +134,8 @@ alias ISO_8859-9:1989// ISO-8859-9// alias ISO_8859-9// ISO-8859-9// alias LATIN5// ISO-8859-9// alias L5// ISO-8859-9// -module ISO-8859-9// ISO-10646/UCS4/ ISO8859-9 1 -module ISO-10646/UCS4/ ISO-8859-9// ISO8859-9 1 +module ISO-8859-9// INTERNAL ISO8859-9 1 +module INTERNAL ISO-8859-9// ISO8859-9 1 # from to module cost alias ISO-IR-157// ISO-8859-10// @@ -148,96 +143,96 @@ alias ISO_8859-10:1993// ISO-8859-10// alias ISO_8859-10// ISO-8859-10// alias LATIN6// ISO-8859-10// alias L6// ISO-8859-10// -module ISO-8859-10// ISO-10646/UCS4/ ISO8859-10 1 -module ISO-10646/UCS4/ ISO-8859-10// ISO8859-10 1 +module ISO-8859-10// INTERNAL ISO8859-10 1 +module INTERNAL ISO-8859-10// ISO8859-10 1 # from to module cost alias T.61// T.61-8BIT// alias ISO-IR-103// T.61-8BIT// -module T.61-8BIT// ISO-10646/UCS4/ T.61 1 -module ISO-10646/UCS4/ T.61-8BIT// T.61 1 +module T.61-8BIT// INTERNAL T.61 1 +module INTERNAL T.61-8BIT// T.61 1 # from to module cost alias ISO-IR-156// ISO_6937// alias ISO_6937:1992// ISO_6937// alias ISO6937// ISO_6937// -module ISO_6937// ISO-10646/UCS4/ ISO_6937 1 -module ISO-10646/UCS4/ ISO_6937// ISO_6937 1 +module ISO_6937// INTERNAL ISO_6937 1 +module INTERNAL ISO_6937// ISO_6937 1 # from to module cost alias SHIFT-JIS// SJIS// -module SJIS// ISO-10646/UCS4/ SJIS 1 -module ISO-10646/UCS4/ SJIS// SJIS 1 +module SJIS// INTERNAL SJIS 1 +module INTERNAL SJIS// SJIS 1 # from to module cost -module KOI-8// ISO-10646/UCS4/ KOI-8 1 -module ISO-10646/UCS4/ KOI-8// KOI-8 1 +module KOI-8// INTERNAL KOI-8 1 +module INTERNAL KOI-8// KOI-8 1 # from to module cost -module KOI8-R// ISO-10646/UCS4/ KOI8-R 1 -module ISO-10646/UCS4/ KOI8-R// KOI8-R 1 +module KOI8-R// INTERNAL KOI8-R 1 +module INTERNAL KOI8-R// KOI8-R 1 # from to module cost alias ISO-IR-19// LATIN-GREEK// -module LATIN-GREEK// ISO-10646/UCS4/ LATIN-GREEK 1 -module ISO-10646/UCS4/ LATIN-GREEK// LATIN-GREEK 1 +module LATIN-GREEK// INTERNAL LATIN-GREEK 1 +module INTERNAL LATIN-GREEK// LATIN-GREEK 1 # from to module cost alias ISO-IR-27// LATIN-GREEK// -module LATIN-GREEK-1// ISO-10646/UCS4/ LATIN-GREEK-1 1 -module ISO-10646/UCS4/ LATIN-GREEK-1// LATIN-GREEK-1 1 +module LATIN-GREEK-1// INTERNAL LATIN-GREEK-1 1 +module INTERNAL LATIN-GREEK-1// LATIN-GREEK-1 1 # from to module cost alias ROMAN8// HP-ROMAN8// alias R8// HP-ROMAN8// -module HP-ROMAN8// ISO-10646/UCS4/ HP-ROMAN8 1 -module ISO-10646/UCS4/ HP-ROMAN8// HP-ROMAN8 1 +module HP-ROMAN8// INTERNAL HP-ROMAN8 1 +module INTERNAL HP-ROMAN8// HP-ROMAN8 1 # from to module cost -module EBCDIC-AT-DE// ISO-10646/UCS4/ EBCDIC-AT-DE 1 -module ISO-10646/UCS4/ EBCDIC-AT-DE// EBCDIC-AT-DE 1 +module EBCDIC-AT-DE// INTERNAL EBCDIC-AT-DE 1 +module INTERNAL EBCDIC-AT-DE// EBCDIC-AT-DE 1 # from to module cost -module EBCDIC-AT-DE-A// ISO-10646/UCS4/ EBCDIC-AT-DE-A 1 -module ISO-10646/UCS4/ EBCDIC-AT-DE-A// EBCDIC-AT-DE-A 1 +module EBCDIC-AT-DE-A// INTERNAL EBCDIC-AT-DE-A 1 +module INTERNAL EBCDIC-AT-DE-A// EBCDIC-AT-DE-A 1 # from to module cost -module EBCDIC-CA-FR// ISO-10646/UCS4/ EBCDIC-CA-FR 1 -module ISO-10646/UCS4/ EBCDIC-CA-FR// EBCDIC-CA-FR 1 +module EBCDIC-CA-FR// INTERNAL EBCDIC-CA-FR 1 +module INTERNAL EBCDIC-CA-FR// EBCDIC-CA-FR 1 # from to module cost alias EUCKR// EUC-KR// -module EUC-KR// ISO-10646/UCS4/ EUC-KR 1 -module ISO-10646/UCS4/ EUC-KR// EUC-KR 1 +module EUC-KR// INTERNAL EUC-KR 1 +module INTERNAL EUC-KR// EUC-KR 1 # from to module cost alias MSCP949// UHC// -module UHC// ISO-10646/UCS4/ UHC 1 -module ISO-10646/UCS4/ UHC// UHC 1 +module UHC// INTERNAL UHC 1 +module INTERNAL UHC// UHC 1 # from to module cost alias MSCP1361// JOHAB// -module JOHAB// ISO-10646/UCS4/ JOHAB 1 -module ISO-10646/UCS4/ JOHAB// JOHAB 1 +module JOHAB// INTERNAL JOHAB 1 +module INTERNAL JOHAB// JOHAB 1 # from to module cost alias BIG-FIVE// BIG5// alias BIGFIVE// BIG5// alias BIG-5// BIG5// -module BIG5// ISO-10646/UCS4/ BIG5 1 -module ISO-10646/UCS4/ BIG5// BIG5 1 +module BIG5// INTERNAL BIG5 1 +module INTERNAL BIG5// BIG5 1 # from to module cost alias EUCJP// EUC-JP// -module EUC-JP// ISO-10646/UCS4/ EUC-JP 1 -module ISO-10646/UCS4/ EUC-JP// EUC-JP 1 +module EUC-JP// INTERNAL EUC-JP 1 +module INTERNAL EUC-JP// EUC-JP 1 # from to module cost alias EUCCN// EUC-CN// -module EUC-CN// ISO-10646/UCS4/ EUC-CN 1 -module ISO-10646/UCS4/ EUC-CN// EUC-CN 1 +module EUC-CN// INTERNAL EUC-CN 1 +module INTERNAL EUC-CN// EUC-CN 1 # from to module cost alias EUCTW// EUC-TW// -module EUC-TW// ISO-10646/UCS4/ EUC-TW 1 -module ISO-10646/UCS4/ EUC-TW// EUC-TW 1 +module EUC-TW// INTERNAL EUC-TW 1 +module INTERNAL EUC-TW// EUC-TW 1 diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c index a9705f0..53ca76c 100644 --- a/iconvdata/iso646.c +++ b/iconvdata/iso646.c @@ -195,6 +195,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, } out_from: *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -243,6 +244,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, } out_to: *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars < data->outbufsize) diff --git a/iconvdata/iso6937.c b/iconvdata/iso6937.c index 30c3831..21e3ab4 100644 --- a/iconvdata/iso6937.c +++ b/iconvdata/iso6937.c @@ -492,6 +492,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -593,6 +594,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/iso8859-1.c b/iconvdata/iso8859-1.c index ab69c4d..b9484a0 100644 --- a/iconvdata/iso8859-1.c +++ b/iconvdata/iso8859-1.c @@ -104,6 +104,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -129,6 +130,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars < data->outbufsize) diff --git a/iconvdata/johab.c b/iconvdata/johab.c index 51b235c..c9912a7 100644 --- a/iconvdata/johab.c +++ b/iconvdata/johab.c @@ -408,6 +408,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -462,6 +463,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/run-iconv-test.sh b/iconvdata/run-iconv-test.sh index 2ef394d..ebafa2f 100755 --- a/iconvdata/run-iconv-test.sh +++ b/iconvdata/run-iconv-test.sh @@ -44,14 +44,18 @@ while read from to targets; do for t in $targets; do $ICONV -f $from -t $t testdata/$from > $temp1 || { echo "*** conversion from $from to $t failed"; exit 1; } + if test -s testdata/$from..$t; then + cmp $temp1 testdata/$from..$t >& /dev/null || + { echo "*** $from -> $t conversion failed"; exit 1; } + fi $ICONV -f $t -t $to -o $temp2 $temp1 || { echo "*** conversion from $t to $to failed"; exit 1; } test -s $temp1 && cmp testdata/$from $temp2 >& /dev/null || - { echo "*** $from -> $t -> $to conversion failed"; exit 1; } + { echo "*** $from -> t -> $to conversion failed"; exit 1; } # All tests ok. echo "$from -> $t -> $to ok" - #rm -f $name1 $name2 + rm -f $temp1 $temp2 done done < TESTS diff --git a/iconvdata/sjis.c b/iconvdata/sjis.c index 7a81c01..33dc2f1 100644 --- a/iconvdata/sjis.c +++ b/iconvdata/sjis.c @@ -4128,6 +4128,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -4181,6 +4182,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/t61.c b/iconvdata/t61.c index d532438..b77ee71 100644 --- a/iconvdata/t61.c +++ b/iconvdata/t61.c @@ -483,6 +483,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -542,6 +543,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/iconvdata/testdata/ISO-8859-1..UTF8 b/iconvdata/testdata/ISO-8859-1..UTF8 new file mode 100644 index 0000000..8ad5d2c --- /dev/null +++ b/iconvdata/testdata/ISO-8859-1..UTF8 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ® ¯ + ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ + À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï + Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß + à á â ã ä å æ ç è é ê ë ì í î ï + ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ diff --git a/iconvdata/testdata/ISO-8859-10 b/iconvdata/testdata/ISO-8859-10 new file mode 100644 index 0000000..7d3f9b2 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-10 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/testdata/ISO-8859-10..UCS2 b/iconvdata/testdata/ISO-8859-10..UCS2 Binary files differnew file mode 100644 index 0000000..0764f06 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-10..UCS2 diff --git a/iconvdata/testdata/ISO-8859-2 b/iconvdata/testdata/ISO-8859-2 new file mode 100644 index 0000000..7d3f9b2 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-2 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/testdata/ISO-8859-2..UCS4 b/iconvdata/testdata/ISO-8859-2..UCS4 Binary files differnew file mode 100644 index 0000000..1795522 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-2..UCS4 diff --git a/iconvdata/testdata/ISO-8859-2..UTF8 b/iconvdata/testdata/ISO-8859-2..UTF8 new file mode 100644 index 0000000..5428c1f --- /dev/null +++ b/iconvdata/testdata/ISO-8859-2..UTF8 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + Ą ˘ Ł ¤ Ľ Ś § ¨ Š Ş Ť Ź Ž Ż + ° ą ˛ ł ´ ľ ś ˇ ¸ š ş ť ź ˝ ž ż + Ŕ Á Â Ă Ä Ĺ Ć Ç Č É Ę Ë Ě Í Î Ď + Đ Ń Ň Ó Ô Ő Ö × Ř Ů Ú Ű Ü Ý Ţ ß + ŕ á â ă ä ĺ ć ç č é ę ë ě í î ď + đ ń ň ó ô ő ö ÷ ř ů ú ű ü ý ţ ˙ diff --git a/iconvdata/testdata/ISO-8859-3 b/iconvdata/testdata/ISO-8859-3 new file mode 100644 index 0000000..e85c3bd --- /dev/null +++ b/iconvdata/testdata/ISO-8859-3 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/testdata/ISO-8859-4 b/iconvdata/testdata/ISO-8859-4 new file mode 100644 index 0000000..7d3f9b2 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-4 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/testdata/ISO-8859-5 b/iconvdata/testdata/ISO-8859-5 new file mode 100644 index 0000000..7d3f9b2 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-5 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/testdata/ISO-8859-6 b/iconvdata/testdata/ISO-8859-6 new file mode 100644 index 0000000..047664e --- /dev/null +++ b/iconvdata/testdata/ISO-8859-6 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/testdata/ISO-8859-7 b/iconvdata/testdata/ISO-8859-7 new file mode 100644 index 0000000..c90d161 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-7 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/testdata/ISO-8859-8 b/iconvdata/testdata/ISO-8859-8 new file mode 100644 index 0000000..42edc07 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-8 @@ -0,0 +1,11 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + diff --git a/iconvdata/testdata/ISO-8859-9 b/iconvdata/testdata/ISO-8859-9 new file mode 100644 index 0000000..7d3f9b2 --- /dev/null +++ b/iconvdata/testdata/ISO-8859-9 @@ -0,0 +1,12 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ + + + + + + diff --git a/iconvdata/uhc.c b/iconvdata/uhc.c index c1d5a40..ed4b7ad 100644 --- a/iconvdata/uhc.c +++ b/iconvdata/uhc.c @@ -2775,6 +2775,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, ++cnt; } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outwchars; } else @@ -2816,6 +2817,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, cnt += sizeof (wchar_t); } *inbufsize -= cnt; + inbuf += cnt; data->outbufavail = outchars; if (outchars + extra < data->outbufsize) diff --git a/stdlib/testmb.c b/stdlib/testmb.c index 117ade1..45dae7d 100644 --- a/stdlib/testmb.c +++ b/stdlib/testmb.c @@ -40,11 +40,7 @@ main (int argc, char *argv[]) int r; char c = 'x'; wchar_t wc; - char *mbc; - - mbc = (char *) malloc (MB_CUR_MAX); - mbc[0] = c; - mbc[1] = '\0'; + char mbc[MB_CUR_MAX]; if ((r = mbtowc (&wc, &c, MB_CUR_MAX)) <= 0) { diff --git a/sysdeps/i386/bits/byteswap.h b/sysdeps/i386/bits/byteswap.h index bf55c89..1eef351 100644 --- a/sysdeps/i386/bits/byteswap.h +++ b/sysdeps/i386/bits/byteswap.h @@ -1,5 +1,5 @@ /* Macros to swap the order of bytes in integer values. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -32,7 +32,7 @@ __v = __bswap_constant_16 (x); \ else \ __asm__ __volatile__ ("rorw $8, %w0" \ - : "=q" (__v) \ + : "=r" (__v) \ : "0" ((unsigned short int) (x)) \ : "cc"); \ __v; }) @@ -59,7 +59,7 @@ __asm__ __volatile__ ("rorw $8, %w0;" \ "rorl $16, %0;" \ "rorw $8, %w0" \ - : "=q" (__v) \ + : "=r" (__v) \ : "0" ((unsigned int) (x)) \ : "cc"); \ __v; }) diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c index cf854d9..c7e5651 100644 --- a/wcsmbs/wcsmbsload.c +++ b/wcsmbs/wcsmbsload.c @@ -38,8 +38,8 @@ static struct gconv_step to_wc = modname: NULL, counter: INT_MAX, from_name: "ANSI_X3.4-1968", - to_name: "ISO-10646/UCS4/", - fct: __gconv_transform_ascii_ucs4, + to_name: "#INTERNAL#", + fct: __gconv_transform_ascii_internal, init_fct: NULL, end_fct: NULL, data: NULL @@ -50,9 +50,9 @@ static struct gconv_step to_mb = shlib_handle: NULL, modname: NULL, counter: INT_MAX, - from_name: "ISO-10646/UCS4/", + from_name: "#INTERNAL#", to_name: "ANSI_X3.4-1968", - fct: __gconv_transform_ucs4_ascii, + fct: __gconv_transform_internal_ascii, init_fct: NULL, end_fct: NULL, data: NULL @@ -113,8 +113,8 @@ __wcsmbs_load_conv (const struct locale_data *new_category) /* Get name of charset of the locale. */ charset_name = new_category->values[_NL_ITEM_INDEX(CODESET)].string; - __wcsmbs_gconv_fcts.tomb = getfct (charset_name, "ISO-10646/UCS4/"); - __wcsmbs_gconv_fcts.towc = getfct ("ISO-10646/UCS4/", charset_name); + __wcsmbs_gconv_fcts.tomb = getfct (charset_name, "#INTERNAL#"); + __wcsmbs_gconv_fcts.towc = getfct ("#INTERNAL#", charset_name); /* If any of the conversion functions is not available we don't use any since this would mean we cannot convert back and |