aboutsummaryrefslogtreecommitdiff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/gconv_builtin.h36
-rw-r--r--iconv/gconv_int.h13
-rw-r--r--iconv/gconv_simple.c268
-rw-r--r--iconv/iconv_prog.c15
4 files changed, 240 insertions, 92 deletions
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 9c98c35..265dca1 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -18,27 +18,41 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+BUILTIN_ALIAS ("UCS4//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("UCS-4//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("ISO-10646//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("10646-1:1993//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("10646-1:1993/UCS4/", "ISO-10646/UCS4/")
+
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
+ "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4",
+ __gconv_transform_internal_ucs4, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
+ "INTERNAL", 1, "=ucs4->INTERNAL",
+ __gconv_transform_internal_ucs4, NULL, NULL)
+/* Please note that we need only one function for both direction. */
+
BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/")
BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/")
-BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
- "ISO-10646/UTF8/", 1, "=ucs4->utf8",
- __gconv_transform_ucs4_utf8, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
+ "ISO-10646/UTF8/", 1, "=INTERNAL->utf8",
+ __gconv_transform_internal_utf8, NULL, NULL)
BUILTIN_TRANSFORMATION ("ISO-10646/UTF-?8/", "ISO-10646/UTF", 13,
- "ISO-10646/UCS4/", 1, "=utf8->ucs4",
- __gconv_transform_utf8_ucs4, NULL, NULL)
+ "INTERNAL", 1, "=utf8->INTERNAL",
+ __gconv_transform_utf8_internal, NULL, NULL)
BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/")
BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/")
-BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "ISO-10646/UCS4/",
- 1, "=ucs2->ucs4",
- __gconv_transform_ucs2_ucs4, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "INTERNAL",
+ 1, "=ucs2->INTERNAL",
+ __gconv_transform_ucs2_internal, NULL, NULL)
-BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "ISO-10646/UCS2/",
- 1, "=ucs4->ucs2",
- __gconv_transform_ucs4_ucs2, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "ISO-10646/UCS2/",
+ 1, "=INTERNAL->ucs2",
+ __gconv_transform_internal_ucs2, NULL, NULL)
BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy",
__gconv_transform_dummy, NULL, NULL)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 35ec31a..a1475f8 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -153,12 +153,13 @@ extern void __gconv_get_builtin_trans (const char *__name,
int __do_flush)
__BUILTIN_TRANS (__gconv_transform_dummy);
-__BUILTIN_TRANS (__gconv_transform_ascii_ucs4);
-__BUILTIN_TRANS (__gconv_transform_ucs4_ascii);
-__BUILTIN_TRANS (__gconv_transform_ucs4_utf8);
-__BUILTIN_TRANS (__gconv_transform_utf8_ucs4);
-__BUILTIN_TRANS (__gconv_transform_ucs2_ucs4);
-__BUILTIN_TRANS (__gconv_transform_ucs4_ucs2);
+__BUILTIN_TRANS (__gconv_transform_ascii_internal);
+__BUILTIN_TRANS (__gconv_transform_internal_ascii);
+__BUILTIN_TRANS (__gconv_transform_utf8_internal);
+__BUILTIN_TRANS (__gconv_transform_internal_utf8);
+__BUILTIN_TRANS (__gconv_transform_ucs2_internal);
+__BUILTIN_TRANS (__gconv_transform_internal_ucs2);
+__BUILTIN_TRANS (__gconv_transform_internal_ucs4);
# undef __BUITLIN_TRANS
#endif
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 38b6b56..b72e61e 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -18,6 +18,8 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <byteswap.h>
+#include <endian.h>
#include <errno.h>
#include <gconv.h>
#include <stdint.h>
@@ -76,16 +78,21 @@ __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
}
-/* Convert from ISO 646-IRV to ISO 10646/UCS4. */
+/* Transform from the internal, UCS4-like format, to UCS4. The
+ difference between the internal ucs4 format and the real UCS4
+ format is, if any, the endianess. The Unicode/ISO 10646 says that
+ unless some higher protocol specifies it differently, the byte
+ order is big endian.*/
int
-__gconv_transform_ascii_ucs4 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_ucs4 (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
gconv_fct fct = next_step->fct;
- size_t do_write;
+ size_t do_write = 0;
int result;
/* If the function is called with no input this means we have to reset
@@ -95,7 +102,6 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step,
{
/* Clear the state. */
memset (data->statep, '\0', sizeof (mbstate_t));
- do_write = 0;
/* Call the steps down the chain if there are any. */
if (data->is_last)
@@ -114,12 +120,126 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step,
else
{
int save_errno = errno;
- do_write = 0;
result = GCONV_OK;
do
{
- const unsigned char *newinbuf = inbuf;
+ size_t n_convert = (MIN (*inlen,
+ (data->outbufsize - data->outbufavail))
+ / sizeof (wchar_t));
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ /* Sigh, we have to do some real work. */
+ wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
+ size_t cnt;
+
+ for (cnt = 0; cnt < n_convert; ++cnt)
+ outbuf[cnt] = bswap_32 (((wchar_t *) inbuf)[cnt]);
+
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ /* Simply copy the data. */
+ memcpy (&data->outbuf[data->outbufsize], inbuf,
+ n_convert * sizeof (wchar_t));
+#else
+# error "This endianess is not supported."
+#endif
+
+ *inlen -= n_convert * sizeof (wchar_t);
+ inbuf += n_convert * sizeof (wchar_t);
+ data->outbufavail += n_convert * sizeof (wchar_t);
+ do_write += n_convert;
+
+ if (*inlen > 0 && *inlen < sizeof (wchar_t))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
+ if (data->is_last)
+ {
+ /* This is the last step. */
+ result = (*inlen < sizeof (wchar_t)
+ ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
+ break;
+ }
+
+ /* Status so far. */
+ result = GCONV_EMPTY_INPUT;
+
+ if (data->outbufavail > 0)
+ {
+ /* Call the functions below in the chain. */
+ size_t newavail = data->outbufavail;
+
+ result = (*fct) (next_step, next_data, data->outbuf, &newavail,
+ written, 0);
+
+ /* Correct the output buffer. */
+ if (newavail != data->outbufavail && newavail > 0)
+ {
+ memmove (data->outbuf,
+ &data->outbuf[data->outbufavail - newavail],
+ newavail);
+ data->outbufavail = newavail;
+ }
+ }
+ }
+ while (*inlen >= sizeof (wchar_t) && result == GCONV_EMPTY_INPUT);
+
+ __set_errno (save_errno);
+ }
+
+ if (written != NULL && data->is_last)
+ *written = do_write;
+
+ return result;
+}
+
+
+/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
+int
+__gconv_transform_ascii_internal (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
+{
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+ gconv_fct fct = next_step->fct;
+ size_t do_write = 0;
+ int result;
+
+ /* If the function is called with no input this means we have to reset
+ to the initial state. The possibly partly converted input is
+ dropped. */
+ if (do_flush)
+ {
+ /* Clear the state. */
+ memset (data->statep, '\0', sizeof (mbstate_t));
+
+ /* Call the steps down the chain if there are any. */
+ if (data->is_last)
+ result = GCONV_OK;
+ else
+ {
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+
+ result = (*fct) (next_step, next_data, NULL, 0, written, 1);
+
+ /* Clear output buffer. */
+ data->outbufavail = 0;
+ }
+ }
+ else
+ {
+ const unsigned char *newinbuf = inbuf;
+ int save_errno = errno;
+
+ result = GCONV_OK;
+ do
+ {
size_t actually = 0;
size_t cnt = 0;
@@ -193,9 +313,10 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step,
/* Convert from ISO 10646/UCS to ISO 646-IRV. */
int
-__gconv_transform_ucs4_ascii (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_ascii (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -228,13 +349,13 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step,
}
else
{
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
int save_errno = errno;
do_write = 0;
result = GCONV_OK;
do
{
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
size_t actually = 0;
size_t cnt = 0;
@@ -264,11 +385,18 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step,
if (result != GCONV_OK)
break;
+ /* Check for incomplete input. */
+ if (*inlen > 0 && *inlen < sizeof (wchar_t))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
if (data->is_last)
{
/* This is the last step. */
- result = (*inlen < sizeof (wchar_t)
- ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
+ result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
break;
}
@@ -306,9 +434,10 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step,
int
-__gconv_transform_ucs4_utf8 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_utf8 (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -341,13 +470,13 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step,
}
else
{
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
int save_errno = errno;
do_write = 0;
result = GCONV_OK;
do
{
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
size_t cnt = 0;
while (data->outbufavail < data->outbufsize
@@ -397,16 +526,24 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step,
/* Remember how much we converted. */
do_write += cnt;
*inlen -= cnt * sizeof (wchar_t);
+ newinbuf += cnt;
/* Check whether an illegal character appeared. */
if (result != GCONV_OK)
break;
+ /* Check for incomplete input. */
+ if (*inlen > 0 && *inlen < sizeof (wchar_t))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
if (data->is_last)
{
/* This is the last step. */
- result = (*inlen < sizeof (wchar_t)
- ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
+ result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
break;
}
@@ -444,9 +581,10 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step,
int
-__gconv_transform_utf8_ucs4 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_utf8_internal (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -578,6 +716,7 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
/* Remember how much we converted. */
do_write += actually;
*inlen -= cnt;
+ inbuf += cnt;
data->outbufavail += actually * sizeof (wchar_t);
@@ -588,7 +727,7 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
break;
}
- if (*inlen < extra)
+ if (*inlen > 0 && *inlen < extra)
{
/* We have an incomplete character at the end. */
result = GCONV_INCOMPLETE_INPUT;
@@ -637,9 +776,10 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
int
-__gconv_transform_ucs2_ucs4 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_ucs2_internal (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -669,12 +809,12 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step,
}
else
{
+ const uint16_t *newinbuf = (const uint16_t *) inbuf;
int save_errno = errno;
do_write = 0;
do
{
- const uint16_t *newinbuf = (const uint16_t *) inbuf;
wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
size_t actually = 0;
@@ -683,34 +823,29 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step,
while (data->outbufavail + 4 <= data->outbufsize
&& *inlen >= 2)
{
- outbuf[actually++] = *newinbuf++;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ outbuf[actually++] = (wchar_t) bswap_16 (*newinbuf++);
+#else
+ outbuf[actually++] = (wchar_t) *newinbuf++;
+#endif
data->outbufavail += 4;
*inlen -= 2;
}
- if (*inlen != 1)
- {
- /* We have an incomplete input character. */
- mbstate_t *state = data->statep;
- state->count = 1;
- state->value = *(uint8_t *) newinbuf;
- --*inlen;
- }
-
/* Remember how much we converted. */
do_write += actually * sizeof (wchar_t);
- /* Check whether an illegal character appeared. */
- if (errno != 0)
+ if (*inlen == 1)
{
- result = GCONV_ILLEGAL_INPUT;
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
break;
}
- if (*inlen == 0 && !__mbsinit (data->statep))
+ /* Check whether an illegal character appeared. */
+ if (errno != 0)
{
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
+ result = GCONV_ILLEGAL_INPUT;
break;
}
@@ -756,9 +891,10 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step,
int
-__gconv_transform_ucs4_ucs2 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_ucs2 (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -791,12 +927,12 @@ __gconv_transform_ucs4_ucs2 (struct gconv_step *step,
}
else
{
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
int save_errno = errno;
do_write = 0;
do
{
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail];
size_t actually = 0;
@@ -810,39 +946,33 @@ __gconv_transform_ucs4_ucs2 (struct gconv_step *step,
__set_errno (EILSEQ);
break;
}
- outbuf[actually++] = (wchar_t) *newinbuf;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ /* Please note that we use the `uint32_t' pointer as a
+ `uint16_t' pointer which works since we are on a
+ little endian machine. */
+ outbuf[actually++] = bswap_16 (*((uint16_t *) newinbuf));
+ ++newinbuf;
+#else
+ outbuf[actually++] = *newinbuf++;
+#endif
*inlen -= 4;
data->outbufavail += 2;
}
- if (*inlen < 4)
- {
- /* We have an incomplete input character. */
- mbstate_t *state = data->statep;
- state->count = *inlen;
- state->value = 0;
- while (*inlen > 0)
- {
- state->value <<= 8;
- state->value += *(uint8_t *) newinbuf;
- --*inlen;
- }
- }
-
/* Remember how much we converted. */
do_write += (const char *) newinbuf - inbuf;
- /* Check whether an illegal character appeared. */
- if (errno != 0)
+ if (*inlen > 0 && *inlen < 4)
{
- result = GCONV_ILLEGAL_INPUT;
+ /* We have an incomplete input character. */
+ result = GCONV_INCOMPLETE_INPUT;
break;
}
- if (*inlen == 0 && !__mbsinit (data->statep))
+ /* Check whether an illegal character appeared. */
+ if (errno != 0)
{
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
+ result = GCONV_ILLEGAL_INPUT;
break;
}
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 0c1b9d0..569bd3b 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -509,14 +509,17 @@ print_known_names (void)
{
if (__gconv_modules_db[cnt]->from_pattern == NULL)
{
- tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist,
- (__compar_fn_t) strcoll);
- tsearch (__gconv_modules_db[cnt]->to_string, &printlist,
- (__compar_fn_t) strcoll);
+ if (strcmp (__gconv_modules_db[cnt]->from_constpfx, "INTERNAL"))
+ tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist,
+ (__compar_fn_t) strcoll);
+ if (strcmp (__gconv_modules_db[cnt]->to_string, "INTERNAL"))
+ tsearch (__gconv_modules_db[cnt]->to_string, &printlist,
+ (__compar_fn_t) strcoll);
}
else
- tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist,
- (__compar_fn_t) strcoll);
+ if (strcmp (__gconv_modules_db[cnt]->from_pattern, "INTERNAL"))
+ tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist,
+ (__compar_fn_t) strcoll);
}
fputs (_("\