aboutsummaryrefslogtreecommitdiff
path: root/wcsmbs/mbrtowc.c
diff options
context:
space:
mode:
Diffstat (limited to 'wcsmbs/mbrtowc.c')
-rw-r--r--wcsmbs/mbrtowc.c144
1 files changed, 54 insertions, 90 deletions
diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c
index cf0bbd6..99bc5a4 100644
--- a/wcsmbs/mbrtowc.c
+++ b/wcsmbs/mbrtowc.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -18,113 +18,77 @@
Boston, MA 02111-1307, USA. */
#include <errno.h>
+#include <gconv.h>
#include <wchar.h>
+#include <wcsmbsload.h>
+
+#include <assert.h>
#ifndef EILSEQ
-#define EILSEQ EINVAL
+# define EILSEQ EINVAL
#endif
-static mbstate_t internal;
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
size_t
__mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
- size_t used = 0;
-
- if (ps == NULL)
- ps = &internal;
-
+ wchar_t buf[1];
+ struct gconv_step_data data;
+ size_t inbytes;
+ int status;
+ size_t result;
+
+ /* Tell where we want the result. */
+ data.outbuf = (char *) (pwc ?: buf);
+ data.outbufavail = 0;
+ data.outbufsize = sizeof (wchar_t);
+ data.is_last = 1;
+ data.statep = ps ?: &state;
+
+ /* A first special case is if S is NULL. This means put PS in the
+ initial state. */
if (s == NULL)
{
- /* See first paragraph of description in 7.16.6.3.2. */
- ps->count = 0;
- return 0;
+ data.outbuf = (char *) buf;
+ s = "";
+ n = 1;
}
- if (n > 0)
- {
- if (ps->count == 0)
- {
- unsigned char byte = (unsigned char) *s++;
- ++used;
-
- /* We must look for a possible first byte of a UTF8 sequence. */
- if (byte < 0x80)
- {
- /* One byte sequence. */
- if (pwc != NULL)
- *pwc = (wchar_t) byte;
- return byte ? used : 0;
- }
-
- if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe)
- {
- /* Oh, oh. An encoding error. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
-
- if ((byte & 0xe0) == 0xc0)
- {
- /* We expect two bytes. */
- ps->count = 1;
- ps->value = byte & 0x1f;
- }
- else if ((byte & 0xf0) == 0xe0)
- {
- /* We expect three bytes. */
- ps->count = 2;
- ps->value = byte & 0x0f;
- }
- else if ((byte & 0xf8) == 0xf0)
- {
- /* We expect four bytes. */
- ps->count = 3;
- ps->value = byte & 0x07;
- }
- else if ((byte & 0xfc) == 0xf8)
- {
- /* We expect five bytes. */
- ps->count = 4;
- ps->value = byte & 0x03;
- }
- else
- {
- /* We expect six bytes. */
- ps->count = 5;
- ps->value = byte & 0x01;
- }
- }
+ /* Make sure we use the correct function. */
+ update_conversion_ptrs ();
- /* We know we have to handle a multibyte character and there are
- some more bytes to read. */
- while (used < n)
+ /* Do a normal conversion. */
+ inbytes = n;
+ status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc,
+ &data, s, &inbytes, NULL, 0);
+
+ /* There must not be any problems with the conversion but illegal input
+ characters. The output buffer must be large enough, otherwise the
+ definition of MB_CUR_MAX is not correct. All the other possible
+ errors also must not happen. */
+ assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT
+ || status == GCONV_INCOMPLETE_INPUT);
+
+ if (status == GCONV_OK)
+ {
+ if (*(wchar_t *)data.outbuf == L'\0')
{
- /* The second to sixths byte must be of the form 10xxxxxx. */
- unsigned char byte = (unsigned char) *s++;
- ++used;
-
- if ((byte & 0xc0) != 0x80)
- {
- /* Oh, oh. An encoding error. */
- __set_errno (EILSEQ);
- return (size_t) -1;
- }
-
- ps->value <<= 6;
- ps->value |= byte & 0x3f;
-
- if (--ps->count == 0)
- {
- /* The character is finished. */
- if (pwc != NULL)
- *pwc = (wchar_t) ps->value;
- return ps->value ? used : 0;
- }
+ /* The converted character is the NUL character. */
+ assert (mbsinit (data.statep));
+ result = 0;
}
+ else
+ result = n - inbytes;
+ }
+ else
+ {
+ result = status == GCONV_INCOMPLETE_INPUT ? (size_t) -2 : (size_t) -1;
+ __set_errno (EILSEQ);
}
- return (size_t) -2;
+ return result;
}
weak_alias (__mbrtowc, mbrtowc)