aboutsummaryrefslogtreecommitdiff
path: root/iconv/gconv_simple.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-04-20 18:41:05 +0000
committerUlrich Drepper <drepper@redhat.com>1998-04-20 18:41:05 +0000
commit8619129f3f0d5a9db6208be5bae6c2a8c9ce61a5 (patch)
tree033b6528f39a85f12db9d0859dbd1b90c2906eee /iconv/gconv_simple.c
parentf1fa8b68f3e7623a3ef86dcd0c7d090ccf0389f5 (diff)
downloadglibc-8619129f3f0d5a9db6208be5bae6c2a8c9ce61a5.zip
glibc-8619129f3f0d5a9db6208be5bae6c2a8c9ce61a5.tar.gz
glibc-8619129f3f0d5a9db6208be5bae6c2a8c9ce61a5.tar.bz2
Update.
1998-04-20 18:00 Ulrich Drepper <drepper@cygnus.com> * libc.map: Add __dgettext to GLIBC_2.0 and __libc_longjmp, and __libc_siglongjmp to GLIBC_2.1. * elf/dl-minimal.c (__assert_perror_fail): Don't use strerror, use __strerror_r. * iconv/Makefile: Don't run tests now. * iconv/iconv_prog.c (process_block): If loop is repeated, call iconv with correct output buffer. Major rewrite of the low-level gconv functionality. * iconv/gconv.c: Rewritten. * iconv/gconv.h: Likewise. * iconv/gconv_builtin.c: Likewise. * iconv/gconv_builtin.h: Likewise. * iconv/gconv_conf.c: Likewise. * iconv/gconv_int.h: Likewise. * iconv/gconv_open.c: Likewise. * iconv/gconv_simple.c: Likewise. * iconv/iconv.c: Likewise. * iconvdata/8bit-gap.c: Likewise. * iconvdata/8bit-generic.c: Likewise. * iconvdata/Makefile: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/cns11643.c: Likewise. * iconvdata/cns11643.h: Likewise. * iconvdata/cns11643l1.c: Likewise. * iconvdata/cns11643l1.h: Likewise. * iconvdata/ebcdic-at-de-a.c: Likewise. * iconvdata/ebcdic-at-de.c: Likewise. * iconvdata/ebcdic-ca-fr.c: Likewise. * iconvdata/euccn.c: Likewise. * iconvdata/eucjp.c: Likewise. * iconvdata/euckr.c: Likewise. * iconvdata/euctw.c: Likewise. * iconvdata/gb2312.c: Likewise. * iconvdata/gb2312.h: Likewise. * iconvdata/hp-roman8.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso6937.c: Likewise. * iconvdata/iso8859-1.c: Likewise. * iconvdata/iso8859-10.c: Likewise. * iconvdata/iso8859-2.c: Likewise. * iconvdata/iso8859-3.c: Likewise. * iconvdata/iso8859-4.c: Likewise. * iconvdata/iso8859-5.c: Likewise. * iconvdata/iso8859-6.c: Likewise. * iconvdata/iso8859-7.c: Likewise. * iconvdata/iso8859-8.c: Likewise. * iconvdata/iso8859-9.c: Likewise. * iconvdata/jis0201.c: Likewise. * iconvdata/jis0201.h: Likewise. * iconvdata/jis0208.c: Likewise. * iconvdata/jis0208.h: Likewise. * iconvdata/jis0212.c: Likewise. * iconvdata/jis0212.h: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/koi-8.c: Likewise. * iconvdata/koi8-r.c: Likewise. * iconvdata/ksc5601.c: Likewise. * iconvdata/ksc5601.h: Likewise. * iconvdata/latin-greek-1.c: Likewise. * iconvdata/latin-greek.c: Likewise. * iconvdata/run-iconv-test.sh: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t61.c: Likewise. * iconvdata/uhc.c: Likewise. * wcsmbs/btowc.c: Likewise. * wcsmbs/mbrtowc.c: Likewise. * wcsmbs/mbsnrtowcs.c: Likewise. * wcsmbs/mbsrtowcs.c: Likewise. * wcsmbs/wcrtomb.c: Likewise. * wcsmbs/wcsmbsload.c: Likewise. * wcsmbs/wcsnrtombs.c: Likewise. * wcsmbs/wcsrtombs.c: Likewise. * wcsmbs/wctob.c: Likewise. * iconv/loop.c: New file. * iconv/skeleton.c: New file. * stdlib/mblen.c: Handle empty input string correctly. * stdlib/mbtowc.c: Likewise. * posix/getopt.c: Various cleanups. * sysdeps/arm/bits/setjmp.h: Add copyright text. * sysdeps/i386/bits/setjmp.h: Likewise. * sysdeps/m68k/bits/setjmp.h: Likewise. * sysdeps/powerpc/bits/setjmp.h: Likewise. * sysdeps/sparc/sparc32/bits/setjmp.h: Likewise. * sysdeps/generic/longjmp.c: Rename function to __libc_siglongjmp and make longjmp weak alias. 1998-04-18 20:29 Philip Blundell <Philip.Blundell@pobox.com> * iconv/Makefile (routines): Only include gconv_dl if building for an ELF system - dynamic linking is not available on a.out. (CFLAGS-gconv_conf.c): Define STATIC_GCONV if omitting gconv_dl due to above check. * iconv/gconv_db.c: If STATIC_GCONV defined, don't try to call routines from gconv_dl. 1998-04-17 Gordon Matzigkeit <gord@profitpress.com> * csu/init.c (_IO_stdin_used): Protect with USE_IN_LIBIO so that we can compile without libio. 1998-04-20 16:28 Ulrich Drepper <drepper@cygnus.com> * sysdeps/mach/hurd/Subdirs: Remove login. 1998-04-11 Gordon Matzigkeit <gord@profitpress.com> * db2/compat.h: Include <errno.h>, to make sure we get the definition of EFTYPE before we define it ourselves. 1998-04-10 Gordon Matzigkeit <gord@profitpress.com> * sysdeps/generic/bits/socket.h: Protect against multiple inclusion. * sysdeps/mach/hurd/bits/ioctls.h: Likewise. Fix typo to allow inclusion from sys/ioctl.h again. 1998-04-16 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * iconvdata/*.[ch]: Clean up namespace. Optimize character lookup. 1998-04-16 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * libc.map: Export __strerror_r. Remove _strerror_internal. 1998-04-16 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * sysdeps/generic/strcasestr.c: Undefine strcasestr, not strstr. Also undefine __strcasestr. 1998-04-16 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * posix/regex.c: Rename __re_max_failures back to re_max_failures, aliases do not work with global variables due to copy relocations. 1998-04-20 15:12 Ulrich Drepper <drepper@cygnus.com> * manual/creature.texi: Fix type. Patch by Andreas Schwab. 1998-04-20 13:47 Ulrich Drepper <drepper@cygnus.com> * signal/sighold.c: Include stddef.h for NULL definition. * signal/sigrelse.c: Likewise. * sysdeps/posix/sigignore.c: Likewise. * sysdeps/posix/sigset.c: Likewise. * sysdeps/posix/waitid.c: Likewise. * sysdeps/unix/sysv/linux/rt_sigsuspend.c: Likewise. * sysdeps/unix/sysv/linux/rt_sigtimedwait.c: Likewise. * sysdeps/unix/sysv/linux/sigwaitinfo.c: Likewise. * wcsmbs/mbsrtowcs.c: Include stdlib.h for MB_CUR_MAX. Patch by Franz Sirl <Franz.Sirl-kernel@lauterbach.com>. 1998-04-13 Mark Kettenis <kettenis@phys.uva.nl> * login/Makefile (headers): Remove utmpx.h and bits/utmpx.h. * login/getutent.c (getutxent): Remove alias. * login/getutent_r.c (setutxent, pututxline, endutxent): Remove aliases. * login/getutid.c (getutxid): Remove alias. * login/getutline.c (getutxline): Remove alias. * login/utmp.h: Add prototypes for __updwtmp, __getutent, __getutid, __getutline and __pututline. * login/utmpx.h: Moved to ... * sysdeps/gnu/utmpx.h: ... here. [__USE_GNU]: Define UTMPX_FILE, UTMPX_FILENAME, WTMPX_FILE and WTMPX_FILENAME, declare utmpxname and updwtmpx. * login/updwtmp.c: Moved to ... * sysdeps/generic/updwtmp.c: ... here. (updwtmp): Generalized by allowing file name transformation. * sysdeps/gnu/updwtmp.c: New file. Use generic implementation with additional file name transformation. * sysdeps/unix/sysv/linux/updwtmp.c: Likewise. * login/utmp_file.c: Moved to ... * sysdeps/generic/utmp_file.c: ... here. (setutent_file): Generalized by allowing file name transformation. Do not print error message. Library functions should not print them. Reported by Jim Meyering. * sysdeps/gnu/utmp_file.c: New file. Use generic implementation with additional file name transformation. * sysdeps/unix/sysv/linux/utmp_file.c: Likewise. * sysdeps/gnu/Makefile [$(subdir)=login] (sysdep_routines): Add setutxent, getutxent, endutxent, getutxid, getutxid, getutxline, pututxline, utmpxname and updwtmpx. (sysdep_headers): Add utmpx.h and bits/utmpx.h. * sysdeps/gnu/bits/utmpx.h [__USE_GNU] Include paths.h. (_PATH_UTMPX): Define to _PATH_UTMP. (_PATH_WTMPX): Define to _PATH_WTMPX. (RUN_LVL): Define only if __USE_GNU. (ACCOUNTING): Define if __USE_GNU. * sysdeps/gnu/setutxent.c: New file. * sysdeps/gnu/getutxent.c: New file. * sysdeps/gnu/endutxent.c: New file. * sysdeps/gnu/getutxid.c: New file. * sysdeps/gnu/getutxline.c: New file. * sysdeps/gnu/pututxline.c: New file. * sysdeps/gnu/utmpxname.c: New file. * sysdeps/gnu/updwtmpx.c: New file. * sysdeps/unix/sysv/linux/paths.h (_PATH_UTMP_DB): Remove. * sysdeps/generic/bits/utmpx.h: Remove. 1998-04-20 Ulrich Drepper <drepper@cygnus.com> * posix/wordexp-test.c (main): Initialize ifs element of ts for ~root test. 1998-04-17 07:53 H.J. Lu <hjl@gnu.org> * sysdeps/unix/sysv/linux/i386/s_pread64.S: Fix a typo. 1998-04-17 11:32 Ulrich Drepper <drepper@cygnus.com> * libio/oldfileops.c (_IO_old_file_seekoff): Define temporary st variable using _G_stat64. * libio/fileops.c: Remove macro definition of fstat, it is in the global header. Reported by Thorsten Kukuk <kukuk@weber.uni-paderborn.de>. 1998-04-17 Philip Blundell <pb@nexus.co.uk> * sysdeps/arm/strlen.S: New file, based on code by Matthew Wilcox <willy@odie.barnet.ac.uk>. 1998-04-16 Philip Blundell <Philip.Blundell@pobox.com> * inet/netinet/in.h (IN6_IS_ADDR_MC_NODELOCAL): New macro, required by IPv6 Basic API. (IN6_IS_ADDR_MC_LINKLOCAL): Likewise. (IN6_IS_ADDR_MC_SITELOCAL): Likewise. (IN6_IS_ADDR_MC_ORGLOCAL): Likewise. (IN6_IS_ADDR_MC_GLOBAL): Likewise.
Diffstat (limited to 'iconv/gconv_simple.c')
-rw-r--r--iconv/gconv_simple.c1220
1 files changed, 308 insertions, 912 deletions
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index b72e61e..f2fec12 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -35,7 +35,7 @@
/* These are definitions used by some of the functions for handling
UTF-8 encoding below. */
-static const wchar_t encoding_mask[] =
+static const uint32_t encoding_mask[] =
{
~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
};
@@ -49,8 +49,8 @@ static const unsigned char encoding_byte[] =
int
__gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
- const char *inbuf, size_t *inlen, size_t *written,
- int do_flush)
+ const char **inbuf, const char *inbufend,
+ size_t *written, int do_flush)
{
size_t do_write;
@@ -60,12 +60,12 @@ __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
do_write = 0;
else
{
- do_write = MIN (*inlen, data->outbufsize - data->outbufavail);
+ do_write = MIN (inbufend - *inbuf, data->outbufend - data->outbuf);
memcpy (data->outbuf, inbuf, do_write);
- *inlen -= do_write;
- data->outbufavail += do_write;
+ *inbuf -= do_write;
+ *data->outbuf += do_write;
}
/* ### TODO Actually, this number must be devided according to the
@@ -83,934 +83,330 @@ __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
format is, if any, the endianess. The Unicode/ISO 10646 says that
unless some higher protocol specifies it differently, the byte
order is big endian.*/
-int
-__gconv_transform_internal_ucs4 (struct gconv_step *step,
- struct gconv_step_data *data,
- const char *inbuf, size_t *inlen,
- size_t *written, int do_flush)
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 4
+#define MIN_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP internal_ucs4_loop
+#define TO_LOOP internal_ucs4_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_internal_ucs4
+
+
+static inline int
+internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ mbstate_t *state, void *data, size_t *converted)
{
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
- gconv_fct fct = next_step->fct;
- size_t do_write = 0;
+ const unsigned char *inptr = *inptrp;
+ unsigned char *outptr = *outptrp;
+ size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
int result;
- /* If the function is called with no input this means we have to reset
- to the initial state. The possibly partly converted input is
- dropped. */
- if (do_flush)
- {
- /* Clear the state. */
- memset (data->statep, '\0', sizeof (mbstate_t));
-
- /* Call the steps down the chain if there are any. */
- if (data->is_last)
- result = GCONV_OK;
- else
- {
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
-
- result = (*fct) (next_step, next_data, NULL, 0, written, 1);
-
- /* Clear output buffer. */
- data->outbufavail = 0;
- }
- }
- else
- {
- int save_errno = errno;
-
- result = GCONV_OK;
- do
- {
- size_t n_convert = (MIN (*inlen,
- (data->outbufsize - data->outbufavail))
- / sizeof (wchar_t));
-
#if __BYTE_ORDER == __LITTLE_ENDIAN
- /* Sigh, we have to do some real work. */
- wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
- size_t cnt;
+ /* Sigh, we have to do some real work. */
+ size_t cnt;
- for (cnt = 0; cnt < n_convert; ++cnt)
- outbuf[cnt] = bswap_32 (((wchar_t *) inbuf)[cnt]);
+ for (cnt = 0; cnt < n_convert; ++cnt)
+ *((uint32_t *) outptr)++ = bswap_32 (*((uint32_t *) inptr)++);
+ *inptrp = inptr;
+ *outptrp = outptr;
#elif __BYTE_ORDER == __BIG_ENDIAN
- /* Simply copy the data. */
- memcpy (&data->outbuf[data->outbufsize], inbuf,
- n_convert * sizeof (wchar_t));
+ /* Simply copy the data. */
+ *inptrp = inptr + n_convert * 4;
+ *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
#else
# error "This endianess is not supported."
#endif
- *inlen -= n_convert * sizeof (wchar_t);
- inbuf += n_convert * sizeof (wchar_t);
- data->outbufavail += n_convert * sizeof (wchar_t);
- do_write += n_convert;
-
- if (*inlen > 0 && *inlen < sizeof (wchar_t))
- {
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- if (data->is_last)
- {
- /* This is the last step. */
- result = (*inlen < sizeof (wchar_t)
- ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
- break;
- }
-
- /* Status so far. */
- result = GCONV_EMPTY_INPUT;
-
- if (data->outbufavail > 0)
- {
- /* Call the functions below in the chain. */
- size_t newavail = data->outbufavail;
-
- result = (*fct) (next_step, next_data, data->outbuf, &newavail,
- written, 0);
-
- /* Correct the output buffer. */
- if (newavail != data->outbufavail && newavail > 0)
- {
- memmove (data->outbuf,
- &data->outbuf[data->outbufavail - newavail],
- newavail);
- data->outbufavail = newavail;
- }
- }
- }
- while (*inlen >= sizeof (wchar_t) && result == GCONV_EMPTY_INPUT);
-
- __set_errno (save_errno);
- }
-
- if (written != NULL && data->is_last)
- *written = do_write;
-
- return result;
-}
-
-
-/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
-int
-__gconv_transform_ascii_internal (struct gconv_step *step,
- struct gconv_step_data *data,
- const char *inbuf, size_t *inlen,
- size_t *written, int do_flush)
-{
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
- gconv_fct fct = next_step->fct;
- size_t do_write = 0;
- int result;
-
- /* If the function is called with no input this means we have to reset
- to the initial state. The possibly partly converted input is
- dropped. */
- if (do_flush)
- {
- /* Clear the state. */
- memset (data->statep, '\0', sizeof (mbstate_t));
-
- /* Call the steps down the chain if there are any. */
- if (data->is_last)
- result = GCONV_OK;
- else
- {
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
-
- result = (*fct) (next_step, next_data, NULL, 0, written, 1);
-
- /* Clear output buffer. */
- data->outbufavail = 0;
- }
- }
- else
- {
- const unsigned char *newinbuf = inbuf;
- int save_errno = errno;
-
- result = GCONV_OK;
- do
- {
- size_t actually = 0;
- size_t cnt = 0;
-
- while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize
- && cnt < *inlen)
- {
- if (*newinbuf > '\x7f')
- {
- /* This is no correct ANSI_X3.4-1968 character. */
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- /* It's an one byte sequence. */
- *(wchar_t *) &data->outbuf[data->outbufavail]
- = (wchar_t) *newinbuf;
- data->outbufavail += sizeof (wchar_t);
- ++actually;
-
- ++newinbuf;
- ++cnt;
- }
-
- /* Remember how much we converted. */
- do_write += cnt * sizeof (wchar_t);
- *inlen -= cnt;
-
- /* Check whether an illegal character appeared. */
- if (result != GCONV_OK)
- break;
-
- if (data->is_last)
- {
- /* This is the last step. */
- result = (*inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
- break;
- }
-
- /* Status so far. */
- result = GCONV_EMPTY_INPUT;
-
- if (data->outbufavail > 0)
- {
- /* Call the functions below in the chain. */
- size_t newavail = data->outbufavail;
-
- result = (*fct) (next_step, next_data, data->outbuf, &newavail,
- written, 0);
-
- /* Correct the output buffer. */
- if (newavail != data->outbufavail && newavail > 0)
- {
- memmove (data->outbuf,
- &data->outbuf[data->outbufavail - newavail],
- newavail);
- data->outbufavail = newavail;
- }
- }
- }
- while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
-
- __set_errno (save_errno);
- }
-
- if (written != NULL && data->is_last)
- *written = do_write / sizeof (wchar_t);
-
- return result;
-}
-
-
-/* Convert from ISO 10646/UCS to ISO 646-IRV. */
-int
-__gconv_transform_internal_ascii (struct gconv_step *step,
- struct gconv_step_data *data,
- const char *inbuf, size_t *inlen,
- size_t *written, int do_flush)
-{
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
- gconv_fct fct = next_step->fct;
- size_t do_write;
- int result;
-
- /* If the function is called with no input this means we have to reset
- to the initial state. The possibly partly converted input is
- dropped. */
- if (do_flush)
- {
- /* Clear the state. */
- memset (data->statep, '\0', sizeof (mbstate_t));
- do_write = 0;
-
- /* Call the steps down the chain if there are any. */
- if (data->is_last)
- result = GCONV_OK;
- else
- {
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
-
- result = (*fct) (next_step, next_data, NULL, 0, written, 1);
-
- /* Clear output buffer. */
- data->outbufavail = 0;
- }
- }
- else
- {
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
- int save_errno = errno;
- do_write = 0;
-
- result = GCONV_OK;
- do
- {
- size_t actually = 0;
- size_t cnt = 0;
-
- while (data->outbufavail < data->outbufsize
- && cnt + 3 < *inlen)
- {
- if (*newinbuf < L'\0' || *newinbuf > L'\x7f')
- {
- /* This is no correct ANSI_X3.4-1968 character. */
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- /* It's an one byte sequence. */
- data->outbuf[data->outbufavail++] = (char) *newinbuf;
- ++actually;
-
- ++newinbuf;
- cnt += sizeof (wchar_t);
- }
-
- /* Remember how much we converted. */
- do_write += cnt / sizeof (wchar_t);
- *inlen -= cnt;
-
- /* Check whether an illegal character appeared. */
- if (result != GCONV_OK)
- break;
-
- /* Check for incomplete input. */
- if (*inlen > 0 && *inlen < sizeof (wchar_t))
- {
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- if (data->is_last)
- {
- /* This is the last step. */
- result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
- break;
- }
-
- /* Status so far. */
- result = GCONV_EMPTY_INPUT;
-
- if (data->outbufavail > 0)
- {
- /* Call the functions below in the chain. */
- size_t newavail = data->outbufavail;
-
- result = (*fct) (next_step, next_data, data->outbuf, &newavail,
- written, 0);
-
- /* Correct the output buffer. */
- if (newavail != data->outbufavail && newavail > 0)
- {
- memmove (data->outbuf,
- &data->outbuf[data->outbufavail - newavail],
- newavail);
- data->outbufavail = newavail;
- }
- }
- }
- while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
-
- __set_errno (save_errno);
- }
-
- if (written != NULL && data->is_last)
- *written = do_write;
-
- return result;
-}
-
-
-int
-__gconv_transform_internal_utf8 (struct gconv_step *step,
- struct gconv_step_data *data,
- const char *inbuf, size_t *inlen,
- size_t *written, int do_flush)
-{
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
- gconv_fct fct = next_step->fct;
- size_t do_write;
- int result;
-
- /* If the function is called with no input this means we have to reset
- to the initial state. The possibly partly converted input is
- dropped. */
- if (do_flush)
- {
- /* Clear the state. */
- memset (data->statep, '\0', sizeof (mbstate_t));
- do_write = 0;
-
- /* Call the steps down the chain if there are any. */
- if (data->is_last)
- result = GCONV_OK;
- else
- {
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
-
- result = (*fct) (next_step, next_data, NULL, 0, written, 1);
-
- /* Clear output buffer. */
- data->outbufavail = 0;
- }
- }
- else
- {
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
- int save_errno = errno;
- do_write = 0;
-
- result = GCONV_OK;
- do
- {
- size_t cnt = 0;
-
- while (data->outbufavail < data->outbufsize
- && cnt * sizeof (wchar_t) + 3 < *inlen)
- {
- wchar_t wc = newinbuf[cnt];
-
- if (wc < 0 && wc > 0x7fffffff)
- {
- /* This is no correct ISO 10646 character. */
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- if (wc < 0x80)
- /* It's an one byte sequence. */
- data->outbuf[data->outbufavail++] = (char) wc;
- else
- {
- size_t step;
- size_t start;
-
- for (step = 2; step < 6; ++step)
- if ((wc & encoding_mask[step - 2]) == 0)
- break;
-
- if (data->outbufavail + step >= data->outbufsize)
- /* Too long. */
- break;
-
- start = data->outbufavail;
- data->outbufavail += step;
- data->outbuf[start] = encoding_byte[step - 2];
- --step;
- do
- {
- data->outbuf[start + step] = 0x80 | (wc & 0x3f);
- wc >>= 6;
- }
- while (--step > 0);
- data->outbuf[start] |= wc;
- }
-
- ++cnt;
- }
-
- /* Remember how much we converted. */
- do_write += cnt;
- *inlen -= cnt * sizeof (wchar_t);
- newinbuf += cnt;
-
- /* Check whether an illegal character appeared. */
- if (result != GCONV_OK)
- break;
-
- /* Check for incomplete input. */
- if (*inlen > 0 && *inlen < sizeof (wchar_t))
- {
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- if (data->is_last)
- {
- /* This is the last step. */
- result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
- break;
- }
-
- /* Status so far. */
- result = GCONV_EMPTY_INPUT;
-
- if (data->outbufavail > 0)
- {
- /* Call the functions below in the chain. */
- size_t newavail = data->outbufavail;
-
- result = (*fct) (next_step, next_data, data->outbuf, &newavail,
- written, 0);
-
- /* Correct the output buffer. */
- if (newavail != data->outbufavail && newavail > 0)
- {
- memmove (data->outbuf,
- &data->outbuf[data->outbufavail - newavail],
- newavail);
- data->outbufavail = newavail;
- }
- }
- }
- while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
-
- __set_errno (save_errno);
- }
-
- if (written != NULL && data->is_last)
- *written = do_write;
-
- return result;
-}
-
-
-int
-__gconv_transform_utf8_internal (struct gconv_step *step,
- struct gconv_step_data *data,
- const char *inbuf, size_t *inlen,
- size_t *written, int do_flush)
-{
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
- gconv_fct fct = next_step->fct;
- size_t do_write;
- int result;
-
- /* If the function is called with no input this means we have to reset
- to the initial state. The possibly partly converted input is
- dropped. */
- if (do_flush)
- {
- /* Clear the state. */
- memset (data->statep, '\0', sizeof (mbstate_t));
- do_write = 0;
-
- /* Call the steps down the chain if there are any. */
- if (data->is_last)
- result = GCONV_OK;
- else
- {
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
-
- result = (*fct) (next_step, next_data, NULL, 0, written, 1);
- }
- }
+ /* Determine the status. */
+ if (*outptrp == outend)
+ result = GCONV_FULL_OUTPUT;
+ else if (*inptrp == inend)
+ result = GCONV_EMPTY_INPUT;
else
- {
- int save_errno = errno;
- int extra = 0;
- do_write = 0;
-
- result = GCONV_OK;
- do
- {
- wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
- size_t cnt = 0;
- size_t actually = 0;
-
- while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize
- && cnt < *inlen)
- {
- size_t start = cnt;
- wchar_t value;
- unsigned char byte;
- int count;
-
- /* Next input byte. */
- byte = inbuf[cnt++];
-
- if (byte < 0x80)
- {
- /* One byte sequence. */
- count = 0;
- value = byte;
- }
- else if ((byte & 0xe0) == 0xc0)
- {
- count = 1;
- value = byte & 0x1f;
- }
- else if ((byte & 0xf0) == 0xe0)
- {
- /* We expect three bytes. */
- count = 2;
- value = byte & 0x0f;
- }
- else if ((byte & 0xf8) == 0xf0)
- {
- /* We expect four bytes. */
- count = 3;
- value = byte & 0x07;
- }
- else if ((byte & 0xfc) == 0xf8)
- {
- /* We expect five bytes. */
- count = 4;
- value = byte & 0x03;
- }
- else if ((byte & 0xfe) == 0xfc)
- {
- /* We expect six bytes. */
- count = 5;
- value = byte & 0x01;
- }
- else
- {
- /* This is an illegal encoding. */
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- if (cnt + count > *inlen)
- {
- /* We don't have enough input. */
- --cnt;
- extra = count;
- break;
- }
-
- /* Read the possible remaining bytes. */
- while (count > 0)
- {
- byte = inbuf[cnt++];
- --count;
-
- if ((byte & 0xc0) != 0x80)
- {
- /* This is an illegal encoding. */
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- value <<= 6;
- value |= byte & 0x3f;
- }
-
- if (result != GCONV_OK)
- {
- cnt = start;
- break;
- }
-
- *outbuf++ = value;
- ++actually;
- }
-
- /* Remember how much we converted. */
- do_write += actually;
- *inlen -= cnt;
- inbuf += cnt;
-
- data->outbufavail += actually * sizeof (wchar_t);
-
- /* Check whether an illegal character appeared. */
- if (result != GCONV_OK)
- {
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- if (*inlen > 0 && *inlen < extra)
- {
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- if (data->is_last)
- {
- /* This is the last step. */
- result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize
- ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
- break;
- }
-
- /* Status so far. */
- result = GCONV_EMPTY_INPUT;
-
- if (data->outbufavail > 0)
- {
- /* Call the functions below in the chain. */
- size_t newavail = data->outbufavail;
-
- result = (*fct) (next_step, next_data, data->outbuf, &newavail,
- written, 0);
-
- /* Correct the output buffer. */
- if (newavail != data->outbufavail && newavail > 0)
- {
- memmove (data->outbuf,
- &data->outbuf[data->outbufavail - newavail],
- newavail);
- data->outbufavail = newavail;
- }
- }
- }
- while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
-
- __set_errno (save_errno);
- }
+ result = GCONV_INCOMPLETE_INPUT;
- if (written != NULL && data->is_last)
- *written = do_write;
+ if (converted != NULL)
+ converted += n_convert;
return result;
}
+#include <iconv/skeleton.c>
-int
-__gconv_transform_ucs2_internal (struct gconv_step *step,
- struct gconv_step_data *data,
- const char *inbuf, size_t *inlen,
- size_t *written, int do_flush)
-{
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
- gconv_fct fct = next_step->fct;
- size_t do_write;
- int result;
-
- /* If the function is called with no input this means we have to reset
- to the initial state. The possibly partly converted input is
- dropped. */
- if (do_flush)
- {
- /* Clear the state. */
- memset (data->statep, '\0', sizeof (mbstate_t));
- do_write = 0;
-
- /* Call the steps down the chain if there are any. */
- if (data->is_last)
- result = GCONV_OK;
- else
- {
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
-
- result = (*fct) (next_step, next_data, NULL, 0, written, 1);
- }
- }
- else
- {
- const uint16_t *newinbuf = (const uint16_t *) inbuf;
- int save_errno = errno;
- do_write = 0;
-
- do
- {
- wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
- size_t actually = 0;
-
- errno = 0;
- while (data->outbufavail + 4 <= data->outbufsize
- && *inlen >= 2)
- {
+/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 1
+#define MIN_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP ascii_internal_loop
+#define TO_LOOP ascii_internal_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_ascii_internal
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#define BODY \
+ { \
+ if (*inptr > '\x7f') \
+ { \
+ /* This is no correct ANSI_X3.4-1968 character. */ \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ /* It's an one byte sequence. */ \
+ *((uint32_t *) outptr)++ = *inptr++; \
+ }
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 4
+#define MIN_NEEDED_TO 1
+#define FROM_DIRECTION 1
+#define FROM_LOOP internal_ascii_loop
+#define TO_LOOP internal_ascii_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_internal_ascii
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#define BODY \
+ { \
+ if (*((uint32_t *) inptr) > '\x7f') \
+ { \
+ /* This is no correct ANSI_X3.4-1968 character. */ \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ /* It's an one byte sequence. */ \
+ *outptr++ = *((uint32_t *) inptr)++; \
+ }
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from the internal (UCS4-like) format to UTF-8. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 4
+#define MIN_NEEDED_TO 1
+#define MAX_NEEDED_TO 6
+#define FROM_DIRECTION 1
+#define FROM_LOOP internal_utf8_loop
+#define TO_LOOP internal_utf8_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_internal_utf8
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#define BODY \
+ { \
+ uint32_t wc = *((uint32_t *) inptr); \
+ \
+ /* Since we control every character we read this cannot happen. */ \
+ assert (wc <= 0x7fffffff); \
+ \
+ if (wc < 0x80) \
+ /* It's an one byte sequence. */ \
+ *outptr++ = (unsigned char) wc; \
+ else \
+ { \
+ size_t step; \
+ char *start; \
+ \
+ for (step = 2; step < 6; ++step) \
+ if ((wc & encoding_mask[step - 2]) == 0) \
+ break; \
+ \
+ if (outptr + step >= outend) \
+ { \
+ /* Too long. */ \
+ result = GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ start = outptr; \
+ *outptr = encoding_byte[step - 2]; \
+ outptr += step; \
+ --step; \
+ do \
+ { \
+ start[step] = 0x80 | (wc & 0x3f); \
+ wc >>= 6; \
+ } \
+ while (--step > 0); \
+ start[0] |= wc; \
+ } \
+ \
+ inptr += 4; \
+ }
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from UTF-8 to the internal (UCS4-like) format. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 1
+#define MAX_NEEDED_FROM 6
+#define MIN_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP utf8_internal_loop
+#define TO_LOOP utf8_internal_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_utf8_internal
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#define BODY \
+ { \
+ uint32_t ch; \
+ uint_fast32_t cnt; \
+ uint_fast32_t i; \
+ \
+ /* Next input byte. */ \
+ ch = *inptr; \
+ \
+ if (ch < 0x80) \
+ /* One byte sequence. */ \
+ cnt = 1; \
+ else if ((ch & 0xe0) == 0xc0) \
+ { \
+ cnt = 2; \
+ ch &= 0x1f; \
+ } \
+ else if ((ch & 0xf0) == 0xe0) \
+ { \
+ /* We expect three bytes. */ \
+ cnt = 3; \
+ ch &= 0x0f; \
+ } \
+ else if ((ch & 0xf8) == 0xf0) \
+ { \
+ /* We expect four bytes. */ \
+ cnt = 4; \
+ ch &= 0x07; \
+ } \
+ else if ((ch & 0xfc) == 0xf8) \
+ { \
+ /* We expect five bytes. */ \
+ cnt = 5; \
+ ch &= 0x03; \
+ } \
+ else if ((ch & 0xfe) == 0xfc) \
+ { \
+ /* We expect six bytes. */ \
+ cnt = 6; \
+ ch &= 0x01; \
+ } \
+ else \
+ { \
+ /* This is an illegal encoding. */ \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ if (NEED_LENGTH_TEST && inptr + cnt >= inend) \
+ { \
+ /* We don't have enough input. */ \
+ result = GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ /* Read the possible remaining bytes. */ \
+ for (i = 1; i < cnt; ++i) \
+ { \
+ uint32_t byte = inptr[i]; \
+ \
+ if ((byte & 0xc0) != 0x80) \
+ { \
+ /* This is an illegal encoding. */ \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ ch <<= 6; \
+ ch |= byte & 0x3f; \
+ } \
+ \
+ /* Now adjust the pointers and store the result. */ \
+ inptr += cnt; \
+ *((uint32_t *) outptr)++ = ch; \
+ }
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from UCS2 to the internal (UCS4-like) format. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 2
+#define MIN_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP ucs2_internal_loop
+#define TO_LOOP ucs2_internal_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_ucs2_internal
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
#if __BYTE_ORDER == __LITTLE_ENDIAN
- outbuf[actually++] = (wchar_t) bswap_16 (*newinbuf++);
+# define BODY \
+ *((uint32_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)++);
#else
- outbuf[actually++] = (wchar_t) *newinbuf++;
+# define BODY \
+ *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
#endif
- data->outbufavail += 4;
- *inlen -= 2;
- }
-
- /* Remember how much we converted. */
- do_write += actually * sizeof (wchar_t);
-
- if (*inlen == 1)
- {
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- /* Check whether an illegal character appeared. */
- if (errno != 0)
- {
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- if (data->is_last)
- {
- /* This is the last step. */
- result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize
- ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
- break;
- }
-
- /* Status so far. */
- result = GCONV_EMPTY_INPUT;
-
- if (data->outbufavail > 0)
- {
- /* Call the functions below in the chain. */
- size_t newavail = data->outbufavail;
-
- result = (*fct) (next_step, next_data, data->outbuf, &newavail,
- written, 0);
-
- /* Correct the output buffer. */
- if (newavail != data->outbufavail && newavail > 0)
- {
- memmove (data->outbuf,
- &data->outbuf[data->outbufavail - newavail],
- newavail);
- data->outbufavail = newavail;
- }
- }
- }
- while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
-
- __set_errno (save_errno);
- }
-
- if (written != NULL && data->is_last)
- *written = do_write;
-
- return result;
-}
-
-
-int
-__gconv_transform_internal_ucs2 (struct gconv_step *step,
- struct gconv_step_data *data,
- const char *inbuf, size_t *inlen,
- size_t *written, int do_flush)
-{
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
- gconv_fct fct = next_step->fct;
- size_t do_write;
- int result;
-
- /* If the function is called with no input this means we have to reset
- to the initial state. The possibly partly converted input is
- dropped. */
- if (do_flush)
- {
- /* Clear the state. */
- memset (data->statep, '\0', sizeof (mbstate_t));
- do_write = 0;
-
- /* Call the steps down the chain if there are any. */
- if (data->is_last)
- result = GCONV_OK;
- else
- {
- struct gconv_step *next_step = step + 1;
- struct gconv_step_data *next_data = data + 1;
-
- result = (*fct) (next_step, next_data, NULL, 0, written, 1);
-
- /* Clear output buffer. */
- data->outbufavail = 0;
- }
- }
- else
- {
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
- int save_errno = errno;
- do_write = 0;
-
- do
- {
- uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail];
- size_t actually = 0;
-
- errno = 0;
-
- while (data->outbufavail + 2 <= data->outbufsize
- && *inlen >= 4)
- {
- if (*newinbuf >= 0x10000)
- {
- __set_errno (EILSEQ);
- break;
- }
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from the internal (UCS4-like) format to UCS2. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 4
+#define MIN_NEEDED_TO 2
+#define FROM_DIRECTION 1
+#define FROM_LOOP internal_ucs2_loop
+#define TO_LOOP internal_ucs2_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_internal_ucs2
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
#if __BYTE_ORDER == __LITTLE_ENDIAN
- /* Please note that we use the `uint32_t' pointer as a
- `uint16_t' pointer which works since we are on a
- little endian machine. */
- outbuf[actually++] = bswap_16 (*((uint16_t *) newinbuf));
- ++newinbuf;
+# define BODY \
+ { \
+ if (*((uint32_t *) inptr) >= 0x10000) \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ /* Please note that we use the `uint32_t' from-pointer as an `uint16_t' \
+ pointer which works since we are on a little endian machine. */ \
+ *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)); \
+ inptr += 4; \
+ }
#else
- outbuf[actually++] = *newinbuf++;
+# define BODY \
+ { \
+ if (*((uint32_t *) inptr) >= 0x10000) \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
+ }
#endif
- *inlen -= 4;
- data->outbufavail += 2;
- }
-
- /* Remember how much we converted. */
- do_write += (const char *) newinbuf - inbuf;
-
- if (*inlen > 0 && *inlen < 4)
- {
- /* We have an incomplete input character. */
- result = GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- /* Check whether an illegal character appeared. */
- if (errno != 0)
- {
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
-
- if (data->is_last)
- {
- /* This is the last step. */
- result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
- break;
- }
-
- /* Status so far. */
- result = GCONV_EMPTY_INPUT;
-
- if (data->outbufavail > 0)
- {
- /* Call the functions below in the chain. */
- size_t newavail = data->outbufavail;
-
- result = (*fct) (next_step, next_data, data->outbuf, &newavail,
- written, 0);
-
- /* Correct the output buffer. */
- if (newavail != data->outbufavail && newavail > 0)
- {
- memmove (data->outbuf,
- &data->outbuf[data->outbufavail - newavail],
- newavail);
- data->outbufavail = newavail;
- }
- }
- }
- while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
-
- __set_errno (save_errno);
- }
-
- if (written != NULL && data->is_last)
- *written = do_write / sizeof (wchar_t);
-
- return result;
-}
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>