aboutsummaryrefslogtreecommitdiff
path: root/wcsmbs
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-04-07 09:21:28 +0000
committerUlrich Drepper <drepper@redhat.com>1998-04-07 09:21:28 +0000
commit4bca4c174852bfc348f99e85684fc8f65631d125 (patch)
treea5779cb83844d32d94557e4b8613278d7d877c9b /wcsmbs
parent91641c65220646ae2ad8b4ec6972adc7e6720d36 (diff)
downloadglibc-4bca4c174852bfc348f99e85684fc8f65631d125.zip
glibc-4bca4c174852bfc348f99e85684fc8f65631d125.tar.gz
glibc-4bca4c174852bfc348f99e85684fc8f65631d125.tar.bz2
Update.
1998-04-07 08:51 Ulrich Drepper <drepper@cygnus.com> * iconv/gconv.c: Take care for NULL arguments. * iconv/gconv_dl.c: Make find_func function global. * iconv/gconv_int.h: Add prototype for __gconv_find_func. Add prototypes for ASCII conversion functions. * iconv/gconv_simple.c: Add ASCII conversion functions. * locale/C-ctype.c: Correct charset name. * wcsmbs/Makefile (distribute): Add wcsmbsload.h. (routines): Remove wmemrtowcs and wmemrtombs, add wcsnlen and wcsmbsload. * wcsmbs/btowc.c: Rewrite to use iconv functionality. * wcsmbs/mbrtowc.c: Likewise. * wcsmbs/mbsnrtowcs.c: Likewise. * wcsmbs/mbsrtowcs.c: Likewise. * wcsmbs/wcrtomb.c: Likewise. * wcsmbs/wcsnrtombs.c: Likewise. * wcsmbs/wcsrtombs.c: Likewise. * wcsmbs/wctob.c: Likewise. * wcsmbs/wchar.h: Add prototype for __wcslen, wcsnlen. Remove prototypes for wmemr*. * wcsmbs/wcslen.c: Rename to __wcslen and make wcslen weak alias. * wcsmbs/wcsnlen.c: New file. * wcsmbs/wcsmbsload.c: New file. * wcsmbs/wcsmbsload.h: New file. * manual/filesys.texi: Mention risks of tmpnam and mktemp. * manual/install.texi: Describe some more critical points. * string/string.h: Add prototype for __strnlen. * string/strnlen.c: Rename to __strnlen and make strnlen weak alias. * sysdeps/posix/mktemp.c: Rewrite to allow many more files and much less predictable names. * sysdeps/posix/mkstemp.c: Likewise. 1998-04-05 Andreas Jaeger <aj@arthur.rhein-neckar.de> * manual/libc.texinfo (Top): Change "file namespace" to "local namespace". * manual/socket.texi: Change file namespace to local namespace. (Out-of-Band Data): Remove unneeded variable link. (Host Address Functions): Use uint32_t consequently and add a number of clarifications for IPv4/IPv6, classless addresses. (Internet Namespace): Added some paragraphs about IPv6. Based on suggestions by Francesco Potorti` <F.Potorti@cnuce.cnr.it>. 1998-04-05 Philip Blundell <Philip.Blundell@pobox.com> Update for draft-ietf-ipngwg-bsd-api-new-01.txt: * resolv/netdb.h (getnameinfo): Use `socklen_t' not `size_t'. (NI_NUMERICHOST, et al.): Tidy up and add comments. (AI_NUMERICHOST): Define. (getnodebyname): New prototype. (AI_V4MAPPED, et al.): New constants. * sysdeps/unix/bsd/bsd4.4/bits/sockaddr.h (SA_LEN): New macro.< * sysdeps/generic/bits/sockaddr.h (SA_LEN): Likewise. * sysdeps/unix/sysv/linux/Makefile (sysdep_routines): Add sa_len for socket. * sysdeps/unix/sysv/linux/sa_len.c: New file. * sysdeps/unix/sysv/linux/Dist: Add sa_len.c. * sysdeps/unix/sysv/linux/bits/socket.h: Make multiple #inclusion safe. * sysdeps/generic/bits/sockunion.h: New file, defining sockaddr_union. * sysdeps/unix/sysv/linux/bits/sockunion.h: Likewise. * socket/Makefile (headers): Add bits/sockunion.h. * socket/sys/socket.h: Include <bits/sockunion.h> * inet/netinet/in.h: Include <bits/sockaddr.h> rather than <sys/socket.h>, to avoid getting sockaddr_union defined. * sysdeps/generic/bits/socket.h: Allow inclusion from netinet.in.h. * sysdeps/unix/sysv/linux/bits/socket.h: Likewise. * sysdeps/unix/sysv/linux/mips/bits/socket.h: Likewise. * sysdeps/unix/sysv/linux/if_index.c: Remove use of SIOCGIFCOUNT (2.2.x kernels won't have it). 1998-04-06 21:21 Ulrich Drepper <drepper@cygnus.com> * sysdeps/mach/hurd/bits/ioctls.h: Allow inclusion from hurd/ioctl.h. * sysdeps/mach/hurd/dl-sysdep.c: Use __ptr_t not caddr_t. * iconv/gconv_conf.c: Define MAXPATHLEN if not available before. Patches by UCHIYAMA Yasushi <uch@nop.or.jp>. 1998-04-05 Philip Blundell <Philip.Blundell@pobox.com> * manual/socket.texi (Socket Addresses): Fix a typo. (Interface Naming): New section. 1998-04-05 23:29 Zack Weinberg <zack@rabi.phys.columbia.edu> * configure.in: Check for CC using $ac_tool_prefix, to handle cross-compilation. Use AC_CHECK_TOOL to find MiG. * config.make.in: Add MIG to be substituted. * mach/Machrules: Don't define MIG variable. Patch by Gordon Matzigkeit <gord@profitpress.com>. 1998-04-05 Mark Kettenis <kettenis@phys.uva.nl> * elf/dl-load.c (decompose_rpath): Use local_strdup instead of strdupa to copy rpath. 1998-04-05 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * configure.in: Fix gcc version check. * aclocal.m4 (AC_PROG_CHECK_VER): Rewritten to make less confusing. 1998-04-05 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * configure.in: Fix gcc version check. * aclocal.m4 (AC_PROG_CHECK_VER): Rewritten to make less confusing. 1998-04-05 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * configure.in: Fix gcc version check. * aclocal.m4 (AC_PROG_CHECK_VER): Rewritten to make less confusing. 1998-03-31 Mark Kettenis <kettenis@phys.uva.nl> * sysdeps/unix/bsd/unlockpt.c (unlockpt): Call __ptsname_r instead of ptsname_r. * stdlib/stdlib.h: Change prototype of ptsname_r to make it more like ttyname_r. * sysdeps/unix/sysv/linux/ptsname.c (__ptsname_r): Likewise. * sysdeps/generic/ptsname.c (__ptsname_r): Likewise. * sysdeps/unix/grantpt.c (grantpt): Change check of return value of __ptsname_r accordingly. * login/openpty.c (openpty): Likewise. 1998-06-04 18:47 H.J. Lu <hjl@gnu.org> * libio/fileops.c (_IO_file_xsgetn): Adjust pointers. 1998-04-06 13:58 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * Makeconfig (libtypes): Redo H.J. Lu's change of 1998-03-27. * Makerules: Readd missing rule for $(objpfx)stamp.oS. 1998-04-06 Thorsten Kukuk <kukuk@vt.uni-paderborn.de> * nis/ypclnt.c: Don't give YPERR_RPC back, use the do_ypcall return value for better error checking.
Diffstat (limited to 'wcsmbs')
-rw-r--r--wcsmbs/Makefile9
-rw-r--r--wcsmbs/btowc.c46
-rw-r--r--wcsmbs/mbrtowc.c144
-rw-r--r--wcsmbs/mbsnrtowcs.c193
-rw-r--r--wcsmbs/mbsrtowcs.c172
-rw-r--r--wcsmbs/wchar.h29
-rw-r--r--wcsmbs/wcrtomb.c109
-rw-r--r--wcsmbs/wcslen.c5
-rw-r--r--wcsmbs/wcsmbsload.c132
-rw-r--r--wcsmbs/wcsmbsload.h52
-rw-r--r--wcsmbs/wcsnlen.c44
-rw-r--r--wcsmbs/wcsnrtombs.c155
-rw-r--r--wcsmbs/wcsrtombs.c157
-rw-r--r--wcsmbs/wctob.c41
14 files changed, 742 insertions, 546 deletions
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile
index 5dd46da..a601489 100644
--- a/wcsmbs/Makefile
+++ b/wcsmbs/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+# Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -22,21 +22,22 @@
subdir := wcsmbs
headers := wchar.h
-distribute := wcwidth.h
+distribute := wcwidth.h wcsmbsload.h
routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \
wcsncmp wcsncpy wcspbrk wcsrchr wcsspn wcstok wcsstr wmemchr \
wmemcmp wmemcpy wmemmove wmemset wcpcpy wcpncpy \
btowc wctob mbsinit \
mbrlen mbrtowc wcrtomb mbsrtowcs wcsrtombs \
- mbsnrtowcs wcsnrtombs wmemrtowcs wmemrtombs \
+ mbsnrtowcs wcsnrtombs wcsnlen \
wcstol wcstoul wcstoll wcstoull wcstod wcstold wcstof \
wcstol_l wcstoul_l wcstoll_l wcstoull_l \
wcstod_l wcstold_l wcstof_l \
wcscoll wcsxfrm \
wcwidth wcswidth \
wcscoll_l wcsxfrm_l \
- wcscasecmp wcsncase wcscasecmp_l wcsncase_l
+ wcscasecmp wcsncase wcscasecmp_l wcsncase_l \
+ wcsmbsload
tests := tst-wcstof
diff --git a/wcsmbs/btowc.c b/wcsmbs/btowc.c
index 4c2f9df..268b1f2 100644
--- a/wcsmbs/btowc.c
+++ b/wcsmbs/btowc.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
@@ -17,18 +17,52 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <gconv.h>
#include <stdio.h>
+#include <string.h>
#include <wchar.h>
+#include <wcsmbsload.h>
-/* We use UTF8 encoding for multibyte strings and therefore a valid
- one byte multibyte string only can have a value from 0 to 0x7f. */
wint_t
btowc (c)
int c;
{
- if (WEOF != (wint_t) EOF || c < 0 || c > 0x7f)
+ char buf[sizeof (wchar_t)];
+ struct gconv_step_data data;
+ char inbuf[1];
+ size_t inbytes;
+ size_t converted;
+ int status;
+
+ /* If the parameter does not fit into one byte or it is the EOF value
+ we can give the answer now. */
+ if (c < -128 || c > 127 || c == EOF)
+ return WEOF;
+
+ /* Tell where we want the result. */
+ data.outbuf = (char *) buf;
+ data.outbufavail = 0;
+ data.outbufsize = sizeof (wchar_t);
+ data.is_last = 1;
+ data.statep = &data.__state;
+
+ /* Make sure we start in the initial state. */
+ memset (&data.__state, '\0', sizeof (mbstate_t));
+
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
+
+ /* Create the input string. */
+ inbuf[0] = c;
+ inbytes = 1;
+
+ status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc,
+ &data, inbuf, &inbytes,
+ &converted, 0);
+ /* The conversion failed. */
+ if (status != GCONV_OK && status != GCONV_FULL_OUTPUT)
return WEOF;
- else
- return (wint_t) c;
+
+ return *(wchar_t *)buf;
}
diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c
index cf0bbd6..99bc5a4 100644
--- a/wcsmbs/mbrtowc.c
+++ b/wcsmbs/mbrtowc.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -18,113 +18,77 @@
Boston, MA 02111-1307, USA. */
#include <errno.h>
+#include <gconv.h>
#include <wchar.h>
+#include <wcsmbsload.h>
+
+#include <assert.h>
#ifndef EILSEQ
-#define EILSEQ EINVAL
+# define EILSEQ EINVAL
#endif
-static mbstate_t internal;
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
size_t
__mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
- size_t used = 0;
-
- if (ps == NULL)
- ps = &internal;
-
+ wchar_t buf[1];
+ struct gconv_step_data data;
+ size_t inbytes;
+ int status;
+ size_t result;
+
+ /* Tell where we want the result. */
+ data.outbuf = (char *) (pwc ?: buf);
+ data.outbufavail = 0;
+ data.outbufsize = sizeof (wchar_t);
+ data.is_last = 1;
+ data.statep = ps ?: &state;
+
+ /* A first special case is if S is NULL. This means put PS in the
+ initial state. */
if (s == NULL)
{
- /* See first paragraph of description in 7.16.6.3.2. */
- ps->count = 0;
- return 0;
+ data.outbuf = (char *) buf;
+ s = "";
+ n = 1;
}
- if (n > 0)
- {
- if (ps->count == 0)
- {
- unsigned char byte = (unsigned char) *s++;
- ++used;
-
- /* We must look for a possible first byte of a UTF8 sequence. */
- if (byte < 0x80)
- {
- /* One byte sequence. */
- if (pwc != NULL)
- *pwc = (wchar_t) byte;
- return byte ? used : 0;
- }
-
- if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe)
- {
- /* Oh, oh. An encoding error. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
-
- if ((byte & 0xe0) == 0xc0)
- {
- /* We expect two bytes. */
- ps->count = 1;
- ps->value = byte & 0x1f;
- }
- else if ((byte & 0xf0) == 0xe0)
- {
- /* We expect three bytes. */
- ps->count = 2;
- ps->value = byte & 0x0f;
- }
- else if ((byte & 0xf8) == 0xf0)
- {
- /* We expect four bytes. */
- ps->count = 3;
- ps->value = byte & 0x07;
- }
- else if ((byte & 0xfc) == 0xf8)
- {
- /* We expect five bytes. */
- ps->count = 4;
- ps->value = byte & 0x03;
- }
- else
- {
- /* We expect six bytes. */
- ps->count = 5;
- ps->value = byte & 0x01;
- }
- }
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
- /* We know we have to handle a multibyte character and there are
- some more bytes to read. */
- while (used < n)
+ /* Do a normal conversion. */
+ inbytes = n;
+ status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc,
+ &data, s, &inbytes, NULL, 0);
+
+ /* There must not be any problems with the conversion but illegal input
+ characters. The output buffer must be large enough, otherwise the
+ definition of MB_CUR_MAX is not correct. All the other possible
+ errors also must not happen. */
+ assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT
+ || status == GCONV_INCOMPLETE_INPUT);
+
+ if (status == GCONV_OK)
+ {
+ if (*(wchar_t *)data.outbuf == L'\0')
{
- /* The second to sixths byte must be of the form 10xxxxxx. */
- unsigned char byte = (unsigned char) *s++;
- ++used;
-
- if ((byte & 0xc0) != 0x80)
- {
- /* Oh, oh. An encoding error. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
-
- ps->value <<= 6;
- ps->value |= byte & 0x3f;
-
- if (--ps->count == 0)
- {
- /* The character is finished. */
- if (pwc != NULL)
- *pwc = (wchar_t) ps->value;
- return ps->value ? used : 0;
- }
+ /* The converted character is the NUL character. */
+ assert (mbsinit (data.statep));
+ result = 0;
}
+ else
+ result = n - inbytes;
+ }
+ else
+ {
+ result = status == GCONV_INCOMPLETE_INPUT ? (size_t) -2 : (size_t) -1;
+ __set_errno (EILSEQ);
}
- return (size_t) -2;
+ return result;
}
weak_alias (__mbrtowc, mbrtowc)
diff --git a/wcsmbs/mbsnrtowcs.c b/wcsmbs/mbsnrtowcs.c
index db67d5c..ded15e4 100644
--- a/wcsmbs/mbsnrtowcs.c
+++ b/wcsmbs/mbsnrtowcs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -18,16 +18,20 @@
Boston, MA 02111-1307, USA. */
#include <errno.h>
+#include <gconv.h>
+#include <string.h>
#include <wchar.h>
+#include <wcsmbsload.h>
+
+#include <assert.h>
#ifndef EILSEQ
-#define EILSEQ EINVAL
+# define EILSEQ EINVAL
#endif
-/* We don't need the state really because we don't have shift states
- to maintain between calls to this function. */
-static mbstate_t internal;
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
/* This is a non-standard function but it is very useful in the
implementation of stdio because we have to deal with unterminated
@@ -40,128 +44,89 @@ __mbsnrtowcs (dst, src, nmc, len, ps)
size_t len;
mbstate_t *ps;
{
- size_t written = 0;
- const char *run = *src;
- const char *last = run + nmc;
- wchar_t value;
- size_t count;
+ size_t inbytes_in;
+ struct gconv_step_data data;
+ size_t result = 0;
+ int status;
- if (ps == NULL)
- ps = &internal;
+ /* Tell where we want the result. */
+ data.is_last = 1;
+ data.statep = ps ?: &state;
- /* Get information from last use of this state. */
- count = ps->count;
- value = ps->value;
+ if (nmc == 0)
+ return 0;
+ inbytes_in = __strnlen (*src, nmc - 1) + 1;
- if (dst == NULL)
- /* The LEN parameter has to be ignored if we don't actually write
- anything. */
- len = ~0;
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
- /* Copy all words. */
- while (written < len && run < last)
+ /* We have to handle DST == NULL special. */
+ if (dst == NULL)
{
- unsigned char byte;
-
- /* Store address of next byte to process. */
- *src = run;
-
- /* Start reading a new character only if we are in the initial
- state. */
- if (count == 0)
- {
- byte = *run++;
-
- /* We expect a start of a new multibyte character. */
- if (byte < 0x80)
- {
- /* One byte sequence. */
- count = 0;
- value = byte;
- }
- else if ((byte & 0xe0) == 0xc0)
- {
- count = 1;
- value = byte & 0x1f;
- }
- else if ((byte & 0xf0) == 0xe0)
- {
- /* We expect three bytes. */
- count = 2;
- value = byte & 0x0f;
- }
- else if ((byte & 0xf8) == 0xf0)
- {
- /* We expect four bytes. */
- count = 3;
- value = byte & 0x07;
- }
- else if ((byte & 0xfc) == 0xf8)
- {
- /* We expect five bytes. */
- count = 4;
- value = byte & 0x03;
- }
- else if ((byte & 0xfe) == 0xfc)
- {
- /* We expect six bytes. */
- count = 5;
- value = byte & 0x01;
- }
- else
- {
- /* This is an illegal encoding. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
- }
-
- /* Read the possible remaining bytes. */
- while (run < last && count > 0)
- {
- byte = *run++;
- --count;
-
- if ((byte & 0xc0) != 0x80)
- {
- /* This is an illegal encoding. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
-
- value <<= 6;
- value |= byte & 0x3f;
- }
-
- /* If this character is only partially available remember this. */
- if (run == last && count != 0)
+ wchar_t buf[64]; /* Just an arbitrary size. */
+ size_t inbytes = inbytes_in;
+ const char *inbuf = *src;
+ size_t written;
+
+ data.outbuf = (char *) buf;
+ data.outbufsize = sizeof (buf);
+ do
{
- ps->count = count;
- ps->value = value;
- break;
+ inbuf += inbytes_in - inbytes;
+ inbytes_in = inbytes;
+ data.outbufavail = 0;
+ written = 0;
+
+ status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc,
+ &data, inbuf, &inbytes,
+ &written, 0);
+ result += written;
}
+ while (status == GCONV_FULL_OUTPUT);
- /* Store value is required. */
- if (dst != NULL)
- *dst++ = value;
-
- /* The whole sequence is read. Check whether end of string is
- reached. */
- if (value == L'\0')
+ if (status == GCONV_OK && ((wchar_t *) dst)[written - 1] == L'\0')
+ /* Don't count the NUL character in. */
+ --result;
+ }
+ else
+ {
+ /* This code is based on the safe assumption that all internal
+ multi-byte encodings use the NUL byte only to mark the end
+ of the string. */
+ size_t inbytes = inbytes_in;
+
+ data.outbuf = (char *) dst;
+ data.outbufsize = len * sizeof (wchar_t);
+ data.outbufavail = 0;
+
+ status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc,
+ &data, *src, &inbytes,
+ &result, 0);
+
+ /* We have to determine whether the last character converted
+ is the NUL character. */
+ if (status == GCONV_OK && ((wchar_t *) dst)[result - 1] == L'\0')
{
- /* Found the end of the string. */
+ assert (result > 0);
+ assert (mbsinit (data.statep));
*src = NULL;
- ps->count = 0;
- return written;
+ --result;
}
-
- /* Increment counter of produced words. */
- ++written;
+ else
+ *src += inbytes_in - inbytes;
}
- /* Store address of next byte to process. */
- *src = run;
+ /* There must not be any problems with the conversion but illegal input
+ characters. */
+ assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT
+ || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT);
+
+ if (status != GCONV_OK && status != GCONV_FULL_OUTPUT)
+ {
+ result = (size_t) -1;
+ __set_errno (EILSEQ);
+ }
- return written;
+ return result;
}
weak_alias (__mbsnrtowcs, mbsnrtowcs)
diff --git a/wcsmbs/mbsrtowcs.c b/wcsmbs/mbsrtowcs.c
index 84d4cbf..d0af40f 100644
--- a/wcsmbs/mbsrtowcs.c
+++ b/wcsmbs/mbsrtowcs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -18,16 +18,20 @@
Boston, MA 02111-1307, USA. */
#include <errno.h>
+#include <gconv.h>
+#include <string.h>
#include <wchar.h>
+#include <wcsmbsload.h>
+
+#include <assert.h>
#ifndef EILSEQ
-#define EILSEQ EINVAL
+# define EILSEQ EINVAL
#endif
-/* We don't need the state really because we don't have shift states
- to maintain between calls to this function. */
-static mbstate_t internal;
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
size_t
__mbsrtowcs (dst, src, len, ps)
@@ -36,108 +40,86 @@ __mbsrtowcs (dst, src, len, ps)
size_t len;
mbstate_t *ps;
{
- size_t written = 0;
- const char *run = *src;
+ struct gconv_step_data data;
+ size_t result = 0;
+ int status;
- if (ps == NULL)
- ps = &internal;
+ /* Tell where we want the result. */
+ data.is_last = 1;
+ data.statep = ps ?: &state;
- if (dst == NULL)
- /* The LEN parameter has to be ignored if we don't actually write
- anything. */
- len = ~0;
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
- /* Copy all words. */
- while (written < len)
+ /* We have to handle DST == NULL special. */
+ if (dst == NULL)
{
- wchar_t value;
- size_t count;
- unsigned char byte;
-
- /* Store address of next byte to process. */
- *src = run;
-
- byte = *run++;
-
- /* We expect a start of a new multibyte character. */
- if (byte < 0x80)
- {
- /* One byte sequence. */
- count = 0;
- value = byte;
- }
- else if ((byte & 0xe0) == 0xc0)
- {
- count = 1;
- value = byte & 0x1f;
- }
- else if ((byte & 0xf0) == 0xe0)
- {
- /* We expect three bytes. */
- count = 2;
- value = byte & 0x0f;
- }
- else if ((byte & 0xf8) == 0xf0)
- {
- /* We expect four bytes. */
- count = 3;
- value = byte & 0x07;
- }
- else if ((byte & 0xfc) == 0xf8)
- {
- /* We expect five bytes. */
- count = 4;
- value = byte & 0x03;
- }
- else if ((byte & 0xfe) == 0xfc)
+ wchar_t buf[64]; /* Just an arbitrary size. */
+ size_t inbytes_in = strlen (*src) + 1;
+ size_t inbytes = inbytes_in;
+ const char *inbuf = *src;
+ size_t written;
+
+ data.outbuf = (char *) buf;
+ data.outbufsize = sizeof (buf);
+ do
{
- /* We expect six bytes. */
- count = 5;
- value = byte & 0x01;
- }
- else
- {
- /* This is an illegal encoding. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
-
- /* Read the possible remaining bytes. */
- while (count-- > 0)
- {
- byte = *run++;
-
- if ((byte & 0xc0) != 0x80)
- {
- /* This is an illegal encoding. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
-
- value <<= 6;
- value |= byte & 0x3f;
+ inbuf += inbytes_in - inbytes;
+ inbytes_in = inbytes;
+ data.outbufavail = 0;
+ written = 0;
+
+ status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc,
+ &data, inbuf, &inbytes,
+ &written, 0);
+ result += written;
}
+ while (status == GCONV_FULL_OUTPUT);
- /* Store value is required. */
- if (dst != NULL)
- *dst++ = value;
-
- /* The whole sequence is read. Check whether end of string is
- reached. */
- if (value == L'\0')
+ if (status == GCONV_OK && ((wchar_t *) dst)[written - 1] == L'\0')
+ /* Don't count the NUL character in. */
+ --result;
+ }
+ else
+ {
+ /* This code is based on the safe assumption that all internal
+ multi-byte encodings use the NUL byte only to mark the end
+ of the string. */
+ size_t inbytes_in = __strnlen (*src, len * MB_CUR_MAX) + 1;
+ size_t inbytes = inbytes_in;
+
+ data.outbuf = (char *) dst;
+ data.outbufsize = len * sizeof (wchar_t);
+ data.outbufavail = 0;
+
+ status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc,
+ &data, *src, &inbytes,
+ &result, 0);
+
+ /* We have to determine whether the last character converted
+ is the NUL character. */
+ if (status == GCONV_OK && ((wchar_t *) dst)[result - 1] == L'\0')
{
- /* Found the end of the string. */
+ assert (result > 0);
+ assert (mbsinit (data.statep));
*src = NULL;
- return written;
+ --result;
}
-
- /* Increment counter of produced words. */
- ++written;
+ else
+ *src += inbytes_in - inbytes;
}
- /* Store address of next byte to process. */
- *src = run;
+ /* There must not be any problems with the conversion but illegal input
+ characters. */
+ assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT
+ || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT);
+
+ if (status != GCONV_OK && status != GCONV_FULL_OUTPUT)
+ {
+ result = (size_t) -1;
+ __set_errno (EILSEQ);
+ }
- return written;
+ return result;
}
weak_alias (__mbsrtowcs, mbsrtowcs)
diff --git a/wcsmbs/wchar.h b/wcsmbs/wchar.h
index 4bba959..719de26 100644
--- a/wcsmbs/wchar.h
+++ b/wcsmbs/wchar.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -166,8 +166,15 @@ extern wchar_t *wcstok __P ((wchar_t *__restrict __s,
wchar_t **__restrict __ptr));
/* Return the number of wide characters in S. */
+extern size_t __wcslen __P ((__const wchar_t *__s));
extern size_t wcslen __P ((__const wchar_t *__s));
+#ifdef __USE_GNU
+/* Return the number of wide characters in S, but at most MAXLEN. */
+extern size_t __wcsnlen __P ((__const wchar_t *__s, size_t __maxlen));
+extern size_t wcsnlen __P ((__const wchar_t *__s, size_t __maxlen));
+#endif
+
/* Search N wide characters of S for C. */
extern wchar_t *wmemchr __P ((__const wchar_t *__s, wchar_t __c, size_t __n));
@@ -260,16 +267,6 @@ extern size_t mbsnrtowcs __P ((wchar_t *__restrict __dst,
__const char **__restrict __src, size_t __nmc,
size_t __len, mbstate_t *__restrict __ps));
-/* Similar function to the above but this does not stop at NUL bytes. */
-extern size_t __wmemrtowcs __P ((wchar_t *__restrict __dst,
- __const char **__restrict __src,
- size_t __nmc, size_t __len,
- mbstate_t *__restrict __ps));
-extern size_t wmemrtowcs __P ((wchar_t *__restrict __dst,
- __const char **__restrict __src,
- size_t __nmc, size_t __len,
- mbstate_t *__restrict __ps));
-
/* Write multibyte character representation of at most NWC characters
from the wide character string SRC to DST. */
extern size_t __wcsnrtombs __P ((char *__restrict __dst,
@@ -280,16 +277,6 @@ extern size_t wcsnrtombs __P ((char *__restrict __dst,
__const wchar_t **__restrict __src,
size_t __nwc, size_t __len,
mbstate_t *__restrict __ps));
-
-/* Similar function to the above but this does not stop at NUL bytes. */
-extern size_t __wmemrtombs __P ((char *__restrict __dst,
- __const wchar_t **__restrict __src,
- size_t __nwc, size_t len,
- mbstate_t *__restrict __ps));
-extern size_t wmemrtombs __P ((char *__restrict __dst,
- __const wchar_t **__restrict __src,
- size_t __nwc, size_t len,
- mbstate_t *__restrict __ps));
#endif /* use GNU */
diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c
index 63859e1..d45ae44 100644
--- a/wcsmbs/wcrtomb.c
+++ b/wcsmbs/wcrtomb.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -18,77 +18,86 @@
Boston, MA 02111-1307, USA. */
#include <errno.h>
+#include <gconv.h>
+#include <stdlib.h>
#include <wchar.h>
+#include <wcsmbsload.h>
+
+#include <assert.h>
#ifndef EILSEQ
-#define EILSEQ EINVAL
+# define EILSEQ EINVAL
#endif
-static const wchar_t encoding_mask[] =
-{
- ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
-};
-
-static const unsigned char encoding_byte[] =
-{
- 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
-};
-/* The state is for this UTF8 encoding not used. */
-static mbstate_t internal;
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
size_t
__wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
{
- size_t written = 0;
-
- if (ps == NULL)
- ps = &internal;
-
+ char buf[MB_CUR_MAX];
+ struct gconv_step_data data;
+ int status;
+ size_t result;
+
+ /* Tell where we want the result. */
+ data.outbuf = s;
+ data.outbufavail = 0;
+ data.outbufsize = MB_CUR_MAX;
+ data.is_last = 1;
+ data.statep = ps ?: &state;
+
+ /* A first special case is if S is NULL. This means put PS in the
+ initial state. */
if (s == NULL)
{
- /* This is equivalent to wcrtomb (<<internal>, L'\0', ps). We
- only have to reset the state. */
- ps->count = 0;
- return 1;
+ data.outbuf = buf;
+ wc = L'\0';
}
- /* Store the UTF8 representation of WC. */
- if (wc < 0 || wc > 0x7fffffff)
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
+
+ /* If WC is the NUL character we write into the output buffer the byte
+ sequence necessary for PS to get into the initial state, followed
+ by a NUL byte. */
+ if (wc == L'\0')
{
- /* This is no correct ISO 10646 character. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
+ size_t inbytes = 0;
+
+ status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb,
+ &data, NULL, &inbytes,
+ NULL, 1);
- if (wc < 0x80)
+ if (status == GCONV_OK)
+ data.outbuf[data.outbufavail++] = '\0';
+ }
+ else
{
- /* It's a one byte sequence. */
- if (s != NULL)
- *s = (char) wc;
- ps->count = 0;
- return 1;
+ /* Do a normal conversion. */
+ size_t inbytes = sizeof (wchar_t);
+
+ status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb,
+ &data, (char *) &wc, &inbytes,
+ NULL, 0);
}
- for (written = 2; written < 6; ++written)
- if ((wc & encoding_mask[written - 2]) == 0)
- break;
+ /* There must not be any problems with the conversion but illegal input
+ characters. The output buffer must be large enough, otherwise the
+ definition of MB_CUR_MAX is not correct. All the other possible
+ errors also must not happen. */
+ assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT
+ || status == GCONV_INCOMPLETE_INPUT);
- if (s != NULL)
+ if (status == GCONV_OK)
+ result = data.outbufavail;
+ else
{
- size_t cnt = written;
- s[0] = encoding_byte[cnt - 2];
-
- --cnt;
- do
- {
- s[cnt] = 0x80 | (wc & 0x3f);
- wc >>= 6;
- }
- while (--cnt > 0);
- s[0] |= wc;
+ result = (size_t) -1;
+ __set_errno (EILSEQ);
}
- return written;
+ return result;
}
weak_alias (__wcrtomb, wcrtomb)
diff --git a/wcsmbs/wcslen.c b/wcsmbs/wcslen.c
index 113bc2a..b0f1d29 100644
--- a/wcsmbs/wcslen.c
+++ b/wcsmbs/wcslen.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
@@ -22,7 +22,7 @@
/* Copy SRC to DEST. */
size_t
-wcslen (s)
+__wcslen (s)
const wchar_t *s;
{
size_t len = 0;
@@ -40,3 +40,4 @@ wcslen (s)
return len;
}
+weak_alias (__wcslen, wcslen)
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
new file mode 100644
index 0000000..cf854d9
--- /dev/null
+++ b/wcsmbs/wcsmbsload.c
@@ -0,0 +1,132 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <langinfo.h>
+#include <limits.h>
+
+#include <wcsmbsload.h>
+#include <bits/libc-lock.h>
+#include <iconv/gconv_int.h>
+
+
+/* Last loaded locale for LC_CTYPE. We initialize for the C locale
+ which is enabled at startup. */
+extern const struct locale_data _nl_C_LC_CTYPE;
+const struct locale_data *__wcsmbs_last_locale = &_nl_C_LC_CTYPE;
+
+
+/* These are the descriptions for the default conversion functions. */
+static struct gconv_step to_wc =
+{
+ shlib_handle: NULL,
+ modname: NULL,
+ counter: INT_MAX,
+ from_name: "ANSI_X3.4-1968",
+ to_name: "ISO-10646/UCS4/",
+ fct: __gconv_transform_ascii_ucs4,
+ init_fct: NULL,
+ end_fct: NULL,
+ data: NULL
+};
+
+static struct gconv_step to_mb =
+{
+ shlib_handle: NULL,
+ modname: NULL,
+ counter: INT_MAX,
+ from_name: "ISO-10646/UCS4/",
+ to_name: "ANSI_X3.4-1968",
+ fct: __gconv_transform_ucs4_ascii,
+ init_fct: NULL,
+ end_fct: NULL,
+ data: NULL
+};
+
+
+/* For the default locale we only have to handle ANSI_X3.4-1968. */
+struct gconv_fcts __wcsmbs_gconv_fcts =
+{
+ towc: &to_wc,
+ tomb: &to_mb
+};
+
+
+static inline struct gconv_step *
+getfct (const char *to, const char *from)
+{
+ size_t nsteps;
+ struct gconv_step *result;
+
+ if (__gconv_find_transform (to, from, &result, &nsteps) != GCONV_OK)
+ /* Loading the conversion step is not possible. */
+ return NULL;
+
+ /* We must only have one step in this conversion. */
+ if (nsteps != 1)
+ return NULL;
+
+ return result;
+}
+
+
+/* Load conversion functions for the currently selected locale. */
+void
+__wcsmbs_load_conv (const struct locale_data *new_category)
+{
+ /* We must modify global data. */
+ __libc_lock_define_initialized (static, lock)
+
+ /* Acquire the lock. */
+ __libc_lock_lock (lock);
+
+ /* We should repest the test since while we waited some other thread
+ might have run this function. */
+ if (__wcsmbs_last_locale != new_category)
+ {
+ if (new_category->name == _nl_C_name) /* Yes, pointer comparison. */
+ {
+ failed:
+ __wcsmbs_gconv_fcts.towc = &to_wc;
+ __wcsmbs_gconv_fcts.tomb = &to_mb;
+ }
+ else
+ {
+ /* We must find the real functions. */
+ const char *charset_name;
+
+ /* Get name of charset of the locale. */
+ charset_name = new_category->values[_NL_ITEM_INDEX(CODESET)].string;
+
+ __wcsmbs_gconv_fcts.tomb = getfct (charset_name, "ISO-10646/UCS4/");
+ __wcsmbs_gconv_fcts.towc = getfct ("ISO-10646/UCS4/", charset_name);
+
+ /* If any of the conversion functions is not available we don't
+ use any since this would mean we cannot convert back and
+ forth.*/
+ if (__wcsmbs_gconv_fcts.towc == NULL
+ || __wcsmbs_gconv_fcts.tomb == NULL)
+ goto failed;
+ }
+
+ /* Set last-used variable for current locale. */
+ __wcsmbs_last_locale = new_category;
+ }
+
+ __libc_lock_unlock (lock);
+}
diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
new file mode 100644
index 0000000..df0ba7b
--- /dev/null
+++ b/wcsmbs/wcsmbsload.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <locale.h>
+#include <wchar.h>
+#include <locale/localeinfo.h>
+
+
+/* Contains pointers to the used functions in the `gconv' modules. */
+struct gconv_fcts
+ {
+ struct gconv_step *towc;
+ struct gconv_step *tomb;
+ };
+
+/* Set of currently active conversion functions. */
+extern struct gconv_fcts __wcsmbs_gconv_fcts;
+
+
+/* Last loaded locale for LC_CTYPE. */
+extern const struct locale_data *__wcsmbs_last_locale;
+
+
+/* Load conversion functions for the currently selected locale. */
+extern void __wcsmbs_load_conv (const struct locale_data *new_category)
+ internal_function;
+
+
+/* Check whether the LC_CTYPE locale changed since the last call.
+ Update the pointers appropriately. */
+static inline void
+update_conversion_ptrs (void)
+{
+ if (__wcsmbs_last_locale != _nl_current_LC_CTYPE)
+ __wcsmbs_load_conv (_nl_current_LC_CTYPE);
+}
diff --git a/wcsmbs/wcsnlen.c b/wcsmbs/wcsnlen.c
new file mode 100644
index 0000000..5264a66
--- /dev/null
+++ b/wcsmbs/wcsnlen.c
@@ -0,0 +1,44 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <wchar.h>
+
+
+/* Copy SRC to DEST. */
+size_t
+__wcsnlen (s, maxlen)
+ const wchar_t *s;
+ size_t maxlen;
+{
+ size_t len = 0;
+
+ while (s[len] != L'\0' && maxlen > 0)
+ {
+ if (s[++len] == L'\0' || --maxlen == 0)
+ return len;
+ if (s[++len] == L'\0' || --maxlen == 0)
+ return len;
+ if (s[++len] == L'\0' || --maxlen == 0)
+ return len;
+ ++len;
+ }
+
+ return len;
+}
+weak_alias (__wcsnlen, wcsnlen)
diff --git a/wcsmbs/wcsnrtombs.c b/wcsmbs/wcsnrtombs.c
index f6c8048..2dd9f7c 100644
--- a/wcsmbs/wcsnrtombs.c
+++ b/wcsmbs/wcsnrtombs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -18,26 +18,19 @@
Boston, MA 02111-1307, USA. */
#include <errno.h>
+#include <gconv.h>
#include <wchar.h>
+#include <wcsmbsload.h>
+
+#include <assert.h>
#ifndef EILSEQ
-#define EILSEQ EINVAL
+# define EILSEQ EINVAL
#endif
-static const wchar_t encoding_mask[] =
-{
- ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
-};
-
-static const unsigned char encoding_byte[] =
-{
- 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
-};
-
-/* We don't need the state really because we don't have shift states
- to maintain between calls to this function. */
-static mbstate_t internal;
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
/* This is a non-standard function but it is very useful in the
implementation of stdio because we have to deal with unterminated
@@ -50,85 +43,91 @@ __wcsnrtombs (dst, src, nwc, len, ps)
size_t len;
mbstate_t *ps;
{
- size_t written = 0;
- const wchar_t *run = *src;
+ struct gconv_step_data data;
+ size_t inbytes_in;
+ int status;
+ size_t result;
- if (ps == NULL)
- ps = &internal;
+ /* Tell where we want the result. */
+ data.is_last = 1;
+ data.statep = ps ?: &state;
- if (dst == NULL)
- /* The LEN parameter has to be ignored if we don't actually write
- anything. */
- len = ~0;
+ if (nwc == 0)
+ return 0;
+ inbytes_in = __wcsnlen (*src, nwc - 1) + 1;
- while (written < len && nwc-- > 0)
- {
- wchar_t wc;
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
- /* Store position of first unprocessed word. */
- *src = run;
+ /* We have to handle DST == NULL special. */
+ if (dst == NULL)
+ {
+ char buf[256]; /* Just an arbitrary value. */
+ size_t inbytes = inbytes_in;
+ const wchar_t *inbuf = *src;
+ size_t written;
- wc = *run++;
+ data.outbuf = buf;
+ data.outbufsize = sizeof (buf);
- if (wc < 0 || wc > 0x7fffffff)
+ do
{
- /* This is no correct ISO 10646 character. */
- __set_errno (EILSEQ);
- return (size_t) -1;
+ inbuf += (inbytes_in - inbytes) / sizeof (wchar_t);
+ inbytes_in = inbytes;
+ data.outbufavail = 0;
+ written = 0;
+
+ status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb,
+ &data,
+ (const char *) inbuf,
+ &inbytes, &written, 0);
+ result += written;
}
+ while (status == GCONV_FULL_OUTPUT);
- if (wc == L'\0')
+ if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0')
+ /* Don't count the NUL character in. */
+ --result;
+ }
+ else
+ {
+ /* This code is based on the safe assumption that all internal
+ multi-byte encodings use the NUL byte only to mark the end
+ of the string. */
+ size_t inbytes = inbytes_in;
+
+ data.outbuf = dst;
+ data.outbufavail = 0;
+ data.outbufsize = len;
+
+ status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb,
+ &data, (const char *) *src,
+ &inbytes, &result, 0);
+
+ /* We have to determine whether the last character converted
+ is the NUL character. */
+ if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0')
{
- /* Found the end. */
- if (dst != NULL)
- *dst = '\0';
+ assert (data.outbufavail > 0);
+ assert (mbsinit (data.statep));
*src = NULL;
- return written;
- }
- else if (wc < 0x80)
- {
- /* It's an one byte sequence. */
- if (dst != NULL)
- *dst++ = (char) wc;
- ++written;
+ --result;
}
else
- {
- size_t step;
-
- for (step = 2; step < 6; ++step)
- if ((wc & encoding_mask[step - 2]) == 0)
- break;
-
- if (written + step >= len)
- /* Too long. */
- break;
-
- if (dst != NULL)
- {
- size_t cnt = step;
-
- dst[0] = encoding_byte[cnt - 2];
-
- --cnt;
- do
- {
- dst[cnt] = 0x80 | (wc & 0x3f);
- wc >>= 6;
- }
- while (--cnt > 0);
- dst[0] |= wc;
+ *src += result;
+ }
- dst += step;
- }
+ /* There must not be any problems with the conversion but illegal input
+ characters. */
+ assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT
+ || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT);
- written += step;
- }
+ if (status != GCONV_OK && status != GCONV_FULL_OUTPUT)
+ {
+ result = (size_t) -1;
+ __set_errno (EILSEQ);
}
- /* Store position of first unprocessed word. */
- *src = run;
-
- return written;
+ return result;
}
weak_alias (__wcsnrtombs, wcsnrtombs)
diff --git a/wcsmbs/wcsrtombs.c b/wcsmbs/wcsrtombs.c
index cc21a51..35bb58e 100644
--- a/wcsmbs/wcsrtombs.c
+++ b/wcsmbs/wcsrtombs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -18,26 +18,20 @@
Boston, MA 02111-1307, USA. */
#include <errno.h>
+#include <stdlib.h>
+#include <gconv.h>
#include <wchar.h>
+#include <wcsmbsload.h>
+
+#include <assert.h>
#ifndef EILSEQ
-#define EILSEQ EINVAL
+# define EILSEQ EINVAL
#endif
-static const wchar_t encoding_mask[] =
-{
- ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
-};
-
-static const unsigned char encoding_byte[] =
-{
- 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
-};
-
-/* We don't need the state really because we don't have shift states
- to maintain between calls to this function. */
-static mbstate_t internal;
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
size_t
__wcsrtombs (dst, src, len, ps)
@@ -46,89 +40,88 @@ __wcsrtombs (dst, src, len, ps)
size_t len;
mbstate_t *ps;
{
- size_t written = 0;
- const wchar_t *run = *src;
+ struct gconv_step_data data;
+ int status;
+ size_t result;
- if (ps == NULL)
- ps = &internal;
+ /* Tell where we want the result. */
+ data.is_last = 1;
+ data.statep = ps ?: &state;
- if (dst == NULL)
- /* The LEN parameter has to be ignored if we don't actually write
- anything. */
- len = ~0;
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
- while (written < len)
+ /* We have to handle DST == NULL special. */
+ if (dst == NULL)
{
- wchar_t wc;
-
- /* Store position of first unprocessed word. */
- *src = run;
+ char buf[256]; /* Just an arbitrary value. */
+ size_t inbytes_in = __wcslen (*src) + 1;
+ size_t inbytes = inbytes_in;
+ const wchar_t *inbuf = *src;
+ size_t written;
- wc = *run++;
+ data.outbuf = buf;
+ data.outbufsize = sizeof (buf);
- if (wc < 0 || wc > 0x7fffffff)
+ do
{
- /* This is no correct ISO 10646 character. */
- __set_errno (EILSEQ);
- return (size_t) -1;
+ inbuf += (inbytes_in - inbytes) / sizeof (wchar_t);
+ inbytes_in = inbytes;
+ data.outbufavail = 0;
+ written = 0;
+
+ status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb,
+ &data,
+ (const char *) inbuf,
+ &inbytes, &written, 0);
+ result += written;
}
+ while (status == GCONV_FULL_OUTPUT);
- if (wc == L'\0')
+ if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0')
+ /* Don't count the NUL character in. */
+ --result;
+ }
+ else
+ {
+ /* This code is based on the safe assumption that all internal
+ multi-byte encodings use the NUL byte only to mark the end
+ of the string. */
+ size_t inbytes_in = __wcsnlen (*src, len * MB_CUR_MAX) + 1;
+ size_t inbytes = inbytes_in;
+
+ data.outbuf = dst;
+ data.outbufavail = 0;
+ data.outbufsize = len;
+
+ status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb,
+ &data, (const char *) *src,
+ &inbytes, &result, 0);
+
+ /* We have to determine whether the last character converted
+ is the NUL character. */
+ if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0')
{
- /* Found the end. */
- if (dst != NULL)
- *dst = '\0';
- ps->count = 0;
+ assert (data.outbufavail > 0);
+ assert (mbsinit (data.statep));
*src = NULL;
- return written;
- }
- else if (wc < 0x80)
- {
- /* It's an one byte sequence. */
- if (dst != NULL)
- *dst++ = (char) wc;
- ++written;
+ --result;
}
else
- {
- size_t step;
-
- for (step = 2; step < 6; ++step)
- if ((wc & encoding_mask[step - 2]) == 0)
- break;
-
- if (written + step >= len)
- /* Too long. */
- break;
-
- if (dst != NULL)
- {
- size_t cnt = step;
-
- dst[0] = encoding_byte[cnt - 2];
-
- --cnt;
- do
- {
- dst[cnt] = 0x80 | (wc & 0x3f);
- wc >>= 6;
- }
- while (--cnt > 0);
- dst[0] |= wc;
-
- dst += step;
- }
-
- written += step;
- }
+ *src += result;
}
- /* Store position of first unprocessed word. */
- *src = run;
+ /* There must not be any problems with the conversion but illegal input
+ characters. */
+ assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT
+ || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT);
- /* Signal that we finished correctly. */
- ps->count = 0;
+ if (status != GCONV_OK && status != GCONV_FULL_OUTPUT)
+ {
+ result = (size_t) -1;
+ __set_errno (EILSEQ);
+ }
- return written;
+ return result;
}
weak_alias (__wcsrtombs, wcsrtombs)
diff --git a/wcsmbs/wctob.c b/wcsmbs/wctob.c
index ce1063a..0461877 100644
--- a/wcsmbs/wctob.c
+++ b/wcsmbs/wctob.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -17,15 +17,48 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <gconv.h>
#include <stdio.h>
+#include <string.h>
#include <wchar.h>
+#include <wcsmbsload.h>
-/* We use UTF8 encoding for multibyte strings and therefore a valid
- one byte multibyte string only can have a value from 0 to 0x7f. */
int
wctob (c)
wint_t c;
{
- return (c >= 0 && c <= 0x7f) ? c : EOF;
+ char buf[MB_LEN_MAX];
+ struct gconv_step_data data;
+ wchar_t inbuf[1];
+ size_t inbytes;
+ size_t converted;
+ int status;
+
+ /* Tell where we want the result. */
+ data.outbuf = (char *) buf;
+ data.outbufavail = 0;
+ data.outbufsize = MB_LEN_MAX;
+ data.is_last = 1;
+ data.statep = &data.__state;
+
+ /* Make sure we start in the initial state. */
+ memset (&data.__state, '\0', sizeof (mbstate_t));
+
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
+
+ /* Create the input string. */
+ inbuf[0] = c;
+ inbytes = sizeof (wchar_t);
+
+ status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, &data,
+ (const char *) inbuf, &inbytes,
+ &converted, 0);
+ /* The conversion failed or the output is too long. */
+ if (status != GCONV_OK && status != GCONV_FULL_OUTPUT
+ || data.outbufavail != 1)
+ return WEOF;
+
+ return buf[0];
}