aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-04-01 17:44:34 +0000
committerUlrich Drepper <drepper@redhat.com>1998-04-01 17:44:34 +0000
commitd2374599d441d86cbf4dab69b69d7563c1fcaaa0 (patch)
tree4fdb62c712cf6f071567810400fe1348dc5ecf1b
parent8fe0fd03e5fb2b5ce401fc313e714af874cf01b5 (diff)
downloadglibc-d2374599d441d86cbf4dab69b69d7563c1fcaaa0.zip
glibc-d2374599d441d86cbf4dab69b69d7563c1fcaaa0.tar.gz
glibc-d2374599d441d86cbf4dab69b69d7563c1fcaaa0.tar.bz2
Update.
1998-04-01 17:38 Ulrich Drepper <drepper@cygnus.com> * iconv/gconv?simple.c: New builtins for UCS en/decoding. * iconv/gconv_builtin.h: Add definitions for new builtins. * iconv/gconv.h: Add prototypes for new builtins. * iconvdata/Makefile (modules): Add ISO646. Add rules for ISO646 module. (distribute): Add iso646.c. * iconvdata/gconv-modules: Add module and alias definition for ISO646 charsets. * iconvdata/iso646.c: New file.
-rw-r--r--ChangeLog13
-rw-r--r--iconv/gconv.h4
-rw-r--r--iconv/gconv_builtin.h23
-rw-r--r--iconv/gconv_simple.c251
-rw-r--r--iconvdata/Makefile6
-rw-r--r--iconvdata/gconv-modules20
-rw-r--r--iconvdata/iso646.c263
7 files changed, 573 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 3cf2563..052f2e9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+1998-04-01 17:38 Ulrich Drepper <drepper@cygnus.com>
+
+ * iconv/gconv?simple.c: New builtins for UCS en/decoding.
+ * iconv/gconv_builtin.h: Add definitions for new builtins.
+ * iconv/gconv.h: Add prototypes for new builtins.
+
+ * iconvdata/Makefile (modules): Add ISO646.
+ Add rules for ISO646 module.
+ (distribute): Add iso646.c.
+ * iconvdata/gconv-modules: Add module and alias definition for
+ ISO646 charsets.
+ * iconvdata/iso646.c: New file.
+
1998-04-01 16:10 Ulrich Drepper <drepper@cygnus.com>
* libc.map: Add __gconv_alias_db, __gconv_nmodules, __gconv_modules_db.
diff --git a/iconv/gconv.h b/iconv/gconv.h
index 83f7a99..139ca0d 100644
--- a/iconv/gconv.h
+++ b/iconv/gconv.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -200,6 +200,8 @@ extern void __gconv_get_builtin_trans __P ((__const char *__name,
__BUILTIN_TRANS (__gconv_transform_dummy);
__BUILTIN_TRANS (__gconv_transform_ucs4_utf8);
__BUILTIN_TRANS (__gconv_transform_utf8_ucs4);
+__BUILTIN_TRANS (__gconv_transform_ucs2_ucs4);
+__BUILTIN_TRANS (__gconv_transform_ucs4_ucs2);
# undef __BUITLIN_TRANS
extern int __gconv_transform_init_rstate __P ((struct gconv_step *__step,
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 3990b2c..8e5d692 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -21,17 +21,32 @@
BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/")
BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/")
-BUILTIN_TRANSFORMATION ("([^/]+)/UCS4/([^/]*)", NULL, 0,
- "\\1/UTF8/\\2", 1, "=ucs4->utf8",
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
+ "ISO-10646/UTF8/", 1, "=ucs4->utf8",
__gconv_transform_ucs4_utf8,
__gconv_transform_init_rstate,
__gconv_transform_end_rstate)
-BUILTIN_TRANSFORMATION ("([^/]+)/UTF-?8/([^/]*)", NULL, 0,
- "\\1/UCS4/\\2", 1, "=utf8->ucs4",
+BUILTIN_TRANSFORMATION ("ISO-10646/UTF-?8/", "ISO-10646/UTF", 13,
+ "ISO-10646/UCS4/", 1, "=utf8->ucs4",
__gconv_transform_utf8_ucs4,
__gconv_transform_init_rstate,
__gconv_transform_end_rstate)
+BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/")
+BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/")
+
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "ISO-10646/UCS4/",
+ 1, "=ucs2->ucs4",
+ __gconv_transform_ucs2_ucs4,
+ __gconv_transform_init_rstate,
+ __gconv_transform_end_rstate)
+
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "ISO-10646/UCS2/",
+ 1, "=ucs4->ucs2",
+ __gconv_transform_ucs4_ucs2,
+ __gconv_transform_init_rstate,
+ __gconv_transform_end_rstate)
+
BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy",
__gconv_transform_dummy, NULL, NULL)
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 6f399cc..280ecf5 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -20,6 +20,7 @@
#include <errno.h>
#include <gconv.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
@@ -289,3 +290,253 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
return result;
}
+
+
+int
+__gconv_transform_ucs2_ucs4 (struct gconv_step *step,
+ struct gconv_step_data *data, const char *inbuf,
+ size_t *inlen, size_t *written, int do_flush)
+{
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+ gconv_fct fct = next_step->fct;
+ size_t do_write;
+ int result;
+
+ /* If the function is called with no input this means we have to reset
+ to the initial state. The possibly partly converted input is
+ dropped. */
+ if (do_flush)
+ {
+ /* Clear the state. */
+ memset (data->data, '\0', sizeof (mbstate_t));
+ do_write = 0;
+
+ /* Call the steps down the chain if there are any. */
+ if (data->is_last)
+ result = GCONV_OK;
+ else
+ {
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+
+ result = (*fct) (next_step, next_data, NULL, 0, written, 1);
+ }
+ }
+ else
+ {
+ int save_errno = errno;
+ do_write = 0;
+
+ do
+ {
+ const uint16_t *newinbuf = (const uint16_t *) inbuf;
+ wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
+ size_t actually = 0;
+
+ errno = 0;
+
+ while (data->outbufavail + 4 <= data->outbufsize
+ && *inlen >= 2)
+ {
+ outbuf[actually++] = *newinbuf++;
+ data->outbufavail += 4;
+ *inlen -= 2;
+ }
+
+ if (*inlen != 1)
+ {
+ /* We have an incomplete input character. */
+ mbstate_t *state = (mbstate_t *) data->data;
+ state->count = 1;
+ state->value = *(uint8_t *) newinbuf;
+ --*inlen;
+ }
+
+ /* Remember how much we converted. */
+ do_write += actually * sizeof (wchar_t);
+
+ /* Check whether an illegal character appeared. */
+ if (errno != 0)
+ {
+ result = GCONV_ILLEGAL_INPUT;
+ break;
+ }
+
+ if (*inlen == 0 && !mbsinit ((mbstate_t *) data->data))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
+ if (data->is_last)
+ {
+ /* This is the last step. */
+ result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize
+ ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
+ break;
+ }
+
+ /* Status so far. */
+ result = GCONV_EMPTY_INPUT;
+
+ if (data->outbufavail > 0)
+ {
+ /* Call the functions below in the chain. */
+ size_t newavail = data->outbufavail;
+
+ result = (*fct) (next_step, next_data, data->outbuf, &newavail,
+ written, 0);
+
+ /* Correct the output buffer. */
+ if (newavail != data->outbufavail && newavail > 0)
+ {
+ memmove (data->outbuf,
+ &data->outbuf[data->outbufavail - newavail],
+ newavail);
+ data->outbufavail = newavail;
+ }
+ }
+ }
+ while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
+
+ __set_errno (save_errno);
+ }
+
+ if (written != NULL && data->is_last)
+ *written = do_write;
+
+ return result;
+}
+
+
+int
+__gconv_transform_ucs4_ucs2 (struct gconv_step *step,
+ struct gconv_step_data *data, const char *inbuf,
+ size_t *inlen, size_t *written, int do_flush)
+{
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+ gconv_fct fct = next_step->fct;
+ size_t do_write;
+ int result;
+
+ /* If the function is called with no input this means we have to reset
+ to the initial state. The possibly partly converted input is
+ dropped. */
+ if (do_flush)
+ {
+ /* Clear the state. */
+ memset (data->data, '\0', sizeof (mbstate_t));
+ do_write = 0;
+
+ /* Call the steps down the chain if there are any. */
+ if (data->is_last)
+ result = GCONV_OK;
+ else
+ {
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+
+ result = (*fct) (next_step, next_data, NULL, 0, written, 1);
+
+ /* Clear output buffer. */
+ data->outbufavail = 0;
+ }
+ }
+ else
+ {
+ int save_errno = errno;
+ do_write = 0;
+
+ do
+ {
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
+ uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail];
+ size_t actually = 0;
+
+ errno = 0;
+
+ while (data->outbufavail + 2 <= data->outbufsize
+ && *inlen >= 4)
+ {
+ if (*newinbuf >= 0x10000)
+ {
+ __set_errno (EILSEQ);
+ break;
+ }
+ outbuf[actually++] = (wchar_t) *newinbuf;
+ *inlen -= 4;
+ data->outbufavail += 2;
+ }
+
+ if (*inlen < 4)
+ {
+ /* We have an incomplete input character. */
+ mbstate_t *state = (mbstate_t *) data->data;
+ state->count = *inlen;
+ state->value = 0;
+ while (*inlen > 0)
+ {
+ state->value <<= 8;
+ state->value += *(uint8_t *) newinbuf;
+ --*inlen;
+ }
+ }
+
+ /* Remember how much we converted. */
+ do_write += (const char *) newinbuf - inbuf;
+
+ /* Check whether an illegal character appeared. */
+ if (errno != 0)
+ {
+ result = GCONV_ILLEGAL_INPUT;
+ break;
+ }
+
+ if (*inlen == 0 && !mbsinit ((mbstate_t *) data->data))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
+ if (data->is_last)
+ {
+ /* This is the last step. */
+ result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
+ break;
+ }
+
+ /* Status so far. */
+ result = GCONV_EMPTY_INPUT;
+
+ if (data->outbufavail > 0)
+ {
+ /* Call the functions below in the chain. */
+ size_t newavail = data->outbufavail;
+
+ result = (*fct) (next_step, next_data, data->outbuf, &newavail,
+ written, 0);
+
+ /* Correct the output buffer. */
+ if (newavail != data->outbufavail && newavail > 0)
+ {
+ memmove (data->outbuf,
+ &data->outbuf[data->outbufavail - newavail],
+ newavail);
+ data->outbufavail = newavail;
+ }
+ }
+ }
+ while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
+
+ __set_errno (save_errno);
+ }
+
+ if (written != NULL && data->is_last)
+ *written = do_write / sizeof (wchar_t);
+
+ return result;
+}
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
index 14aeaae..bf9a256 100644
--- a/iconvdata/Makefile
+++ b/iconvdata/Makefile
@@ -26,7 +26,7 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \
ISO8859-6 ISO8859-7 ISO8859-8 ISO8859-9 ISO8859-10 \
T.61 ISO_6937 SJIS KOI-8 KOI8-R LATIN-GREEK LATIN-GREEK-1 \
HP-ROMAN8 EBCDIC-AT-DE EBCDIC-AT-DE-A EBCDIC-CA-FR \
- EUC-KR UHC JOHAB libJIS libKSC
+ EUC-KR UHC JOHAB libJIS libKSC ISO646
modules.so := $(addsuffix .so, $(modules))
@@ -42,6 +42,7 @@ ISO8859-7-routines := iso8859-7
ISO8859-8-routines := iso8859-8
ISO8859-9-routines := iso8859-9
ISO8859-10-routines := iso8859-10
+ISO646-routines := iso646
T.61-routines := t61
ISO_6937-routines := iso6937
SJIS-routines := sjis
@@ -74,7 +75,8 @@ distribute := 8bit-generic.c 8bit-gap.c gap.pl gaptab.pl gconv-modules \
koi-8.c koi8-r.c koi8-r.h hp-roman8.c latin-greek.c \
latin-greek.h latin-greek-1.c latin-greek-1.h ebcdic-at-de.c \
ebcdic-at-de-a.c ebcdic-ca-fr.c jis0201.c jis0208.c jis0212.c \
- extra-module.mk euckr.c johab.c uhc.c ksc5601.c ksc5601.h
+ extra-module.mk euckr.c johab.c uhc.c ksc5601.c ksc5601.h \
+ iso646.c
# We build the transformation modules only when we build shared libs.
ifeq (yes,$(build-shared))
diff --git a/iconvdata/gconv-modules b/iconvdata/gconv-modules
index 7bca452..adbf2f0 100644
--- a/iconvdata/gconv-modules
+++ b/iconvdata/gconv-modules
@@ -39,6 +39,26 @@ alias 10646-1:1993// ISO-10646/UCS4/
alias 10646-1:1993/UCS4/ ISO-10646/UCS4/
# from to module cost
+alias ISO-IR-6// ANSI_X3.4-1968//
+alias ANSI_X3.4-1986// ANSI_X3.4-1968//
+alias ISO_646.IRV:1991// ANSI_X3.4-1968//
+alias ASCII// ANSI_X3.4-1968//
+alias ISO646-US// ANSI_X3.4-1968//
+alias US-ASCII// ANSI_X3.4-1968//
+alias US// ANSI_X3.4-1968//
+alias IBM367// ANSI_X3.4-1968//
+alias CP367// ANSI_X3.4-1968//
+module ANSI_X3.4-1968// ISO-10646/UCS4/ ISO646 1
+module ISO-10646/UCS4/ ANSI_X3.4-1968// ISO646 1
+
+alias ISO-IR-4// BS_4730//
+alias ISO646-GB// BS_4730//
+alias GB// BS_4730//
+alias UK// BS_4730//
+module BS_4730// ISO-10646/UCS4/ ISO646
+module ISO-10646/UCS4/ BS_4730// ISO646
+
+# from to module cost
alias ISO-IR-100// ISO-8859-1//
alias ISO_8859-1:1987// ISO-8859-1//
alias ISO_8859-1// ISO-8859-1//
diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c
new file mode 100644
index 0000000..db0c089
--- /dev/null
+++ b/iconvdata/iso646.c
@@ -0,0 +1,263 @@
+/* Conversion to and from the various ISO 646 CCS.
+ Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <gconv.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Direction of the transformation. */
+enum direction
+{
+ illegal,
+ to_iso646,
+ from_iso646
+};
+
+enum variant
+{
+ US, /* ANSI_X3.4-1968 */
+ GB, /* BS_4730 */
+};
+
+struct iso646_data
+{
+ enum direction dir;
+ enum variant var;
+};
+
+
+int
+gconv_init (struct gconv_step *step, struct gconv_step_data *data)
+{
+ /* Determine which direction. */
+ struct iso646_data *new_data;
+ enum direction dir;
+ enum variant var;
+ int result;
+
+ if (strcasestr (step->from_name, "ANSI_X3.4-1968") != NULL)
+ {
+ dir = from_iso646;
+ var = US;
+ }
+ else if (strcasestr (step->from_name, "BS_4730") != NULL)
+ {
+ dir = from_iso646;
+ var = GB;
+ }
+ else if (strcasestr (step->to_name, "ANSI_X3.4-1968") != NULL)
+ {
+ dir = to_iso646;
+ var = US;
+ }
+ else if (strcasestr (step->to_name, "BS_4730") != NULL)
+ {
+ dir = to_iso646;
+ var = GB;
+ }
+ else
+ dir = illegal;
+
+ result = GCONV_NOCONV;
+ if (dir != illegal
+ && ((new_data
+ = (struct iso646_data *) malloc (sizeof (struct iso646_data)))
+ != NULL))
+ {
+ new_data->dir = dir;
+ new_data->var = var;
+ data->data = new_data;
+ result = GCONV_OK;
+ }
+
+ return result;
+}
+
+
+void
+gconv_end (struct gconv_step_data *data)
+{
+ free (data->data);
+}
+
+
+int
+gconv (struct gconv_step *step, struct gconv_step_data *data,
+ const char *inbuf, size_t *inbufsize, size_t *written, int do_flush)
+{
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+ gconv_fct fct = next_step->fct;
+ size_t do_write;
+ int result;
+
+ /* If the function is called with no input this means we have to reset
+ to the initial state. The possibly partly converted input is
+ dropped. */
+ if (do_flush)
+ {
+ do_write = 0;
+
+ /* Call the steps down the chain if there are any. */
+ if (data->is_last)
+ result = GCONV_OK;
+ else
+ {
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+
+ result = (*fct) (next_step, next_data, NULL, 0, written, 1);
+
+ /* Clear output buffer. */
+ data->outbufavail = 0;
+ }
+ }
+ else
+ {
+ enum direction dir = ((struct iso646_data *) data->data)->dir;
+ enum variant var = ((struct iso646_data *) data->data)->var;
+
+ do_write = 0;
+
+ do
+ {
+ result = GCONV_OK;
+
+ if (dir == from_iso646)
+ {
+ size_t inchars = *inbufsize;
+ size_t outwchars = data->outbufavail;
+ char *outbuf = data->outbuf;
+ size_t cnt = 0;
+
+ while (cnt < inchars
+ && (outwchars + sizeof (wchar_t) <= data->outbufsize))
+ {
+ switch ((unsigned char) inbuf[cnt])
+ {
+ case '\x23':
+ if (var == GB)
+ *((wchar_t *) (outbuf + outwchars)) = 0xa3;
+ else
+ *((wchar_t *) (outbuf + outwchars)) = 0x23;
+ break;
+ case '\x75':
+ if (var == GB)
+ *((wchar_t *) (outbuf + outwchars)) = 0x203e;
+ else
+ *((wchar_t *) (outbuf + outwchars)) = 0x75;
+ break;
+ default:
+ *((wchar_t *) (outbuf + outwchars)) =
+ (unsigned char) inbuf[cnt];
+ }
+ ++do_write;
+ outwchars += sizeof (wchar_t);
+ ++cnt;
+ }
+ *inbufsize -= cnt;
+ data->outbufavail = outwchars;
+ }
+ else
+ {
+ size_t inwchars = *inbufsize;
+ size_t outchars = data->outbufavail;
+ char *outbuf = data->outbuf;
+ size_t cnt = 0;
+
+ while (inwchars >= cnt + sizeof (wchar_t)
+ && outchars < data->outbufsize)
+ {
+ if (*((wchar_t *) (inbuf + cnt)) >= L'\0'
+ && *((wchar_t *) (inbuf + cnt)) <= L'\177')
+ outbuf[outchars] = *((wchar_t *) (inbuf + cnt));
+ else
+ /* Here is where the transliteration would enter the
+ scene. */
+ break;
+
+ ++do_write;
+ ++outchars;
+ cnt += sizeof (wchar_t);
+ }
+ *inbufsize -= cnt;
+ data->outbufavail = outchars;
+
+ if (outchars < data->outbufsize)
+ {
+ /* If there is still room in the output buffer something
+ is wrong with the input. */
+ if (inwchars >= cnt + sizeof (wchar_t))
+ {
+ /* An error occurred. */
+ result = GCONV_ILLEGAL_INPUT;
+ break;
+ }
+ if (inwchars != cnt)
+ {
+ /* There are some unprocessed bytes at the end of the
+ input buffer. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+ }
+ }
+
+ if (result != GCONV_OK)
+ break;
+
+ if (data->is_last)
+ {
+ /* This is the last step. */
+ result = (*inbufsize > (dir == from_iso646
+ ? 0 : sizeof (wchar_t) - 1)
+ ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
+ break;
+ }
+
+ /* Status so far. */
+ result = GCONV_EMPTY_INPUT;
+
+ if (data->outbufavail > 0)
+ {
+ /* Call the functions below in the chain. */
+ size_t newavail = data->outbufavail;
+
+ result = (*fct) (next_step, next_data, data->outbuf, &newavail,
+ written, 0);
+
+ /* Correct the output buffer. */
+ if (newavail != data->outbufavail && newavail > 0)
+ {
+ memmove (data->outbuf,
+ &data->outbuf[data->outbufavail - newavail],
+ newavail);
+ data->outbufavail = newavail;
+ }
+ }
+ }
+ while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT);
+ }
+
+ if (written != NULL && data->is_last)
+ *written = do_write;
+
+ return result;
+}