diff options
author | Ulrich Drepper <drepper@gmail.com> | 2012-01-07 10:52:53 -0500 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2012-01-07 10:52:53 -0500 |
commit | 9954432e309c8fddaec2fe53e601702a5c981624 (patch) | |
tree | 3eb7513694e25391b3393afbb847dbd85ebf097a /wcsmbs/mbrtoc16.c | |
parent | c3a87236702cb73be1dada3438bbd3c3934e83f8 (diff) | |
download | glibc-9954432e309c8fddaec2fe53e601702a5c981624.zip glibc-9954432e309c8fddaec2fe53e601702a5c981624.tar.gz glibc-9954432e309c8fddaec2fe53e601702a5c981624.tar.bz2 |
More char16_t and char32_t support
It works now for UTF-8 locales
Diffstat (limited to 'wcsmbs/mbrtoc16.c')
-rw-r--r-- | wcsmbs/mbrtoc16.c | 52 |
1 files changed, 45 insertions, 7 deletions
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c index 7b5822d..df970fb 100644 --- a/wcsmbs/mbrtoc16.c +++ b/wcsmbs/mbrtoc16.c @@ -1,6 +1,6 @@ /* Copyright (C) 2011, 2012 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.org>, 2011. + Contributed by Ulrich Drepper <drepper@gmail.com>, 2011. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -43,20 +43,32 @@ static mbstate_t state; size_t mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) { - char16_t buf[1]; + if (ps == NULL) + ps = &state; + + if (ps->__count & 0x80000000) + { + /* We have to return the second word for a surrogate. */ + ps->__count &= 0x7fffffff; + *pc16 = ps->__value.__wch; + ps->__value.__wch = L'\0'; + return (size_t) -3; + } + + char16_t buf[2]; struct __gconv_step_data data; int status; size_t result; size_t dummy; const unsigned char *inbuf, *endbuf; - unsigned char *outbuf = (unsigned char *) (pc16 ?: buf); + unsigned char *outbuf = (unsigned char *) buf; const struct gconv_fcts *fcts; /* Set information for this step. */ data.__invocation_counter = 0; data.__internal_use = 1; data.__flags = __GCONV_IS_LAST; - data.__statep = ps ?: &state; + data.__statep = ps; data.__trans = NULL; /* A first special case is if S is NULL. This means put PS in the @@ -85,9 +97,22 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (fcts->toc16->__shlib_handle != NULL) PTR_DEMANGLE (fct); #endif + + /* We first have to check whether the character can be represented + without a surrogate. If we immediately pass in a buffer large + enough to hold two char16_t values and the first character does + not require a surrogate the routine will try to convert more + input if N is larger then needed for the first character. */ status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, NULL, &dummy, 0, 1)); + if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf) + { + data.__outbufend = outbuf + 2 * sizeof (char16_t); + status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, + NULL, &dummy, 0, 1)); + } + /* There must not be any problems with the conversion but illegal input characters. The output buffer must be large enough, otherwise the definition of MB_CUR_MAX is not correct. All the other possible @@ -100,15 +125,28 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT || status == __GCONV_FULL_OUTPUT) { - if (data.__outbuf != (unsigned char *) outbuf - && *(char16_t *) outbuf == U('\0')) + if (pc16 != NULL) + *pc16 = buf[0]; + + if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0')) { /* The converted character is the NUL character. */ assert (__mbsinit (data.__statep)); result = 0; } else - result = inbuf - (const unsigned char *) s; + { + result = inbuf - (const unsigned char *) s; + + if (data.__outbuf != outbuf + 2) + { + /* This is a surrogate. */ + assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff); + assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff); + ps->__count |= 0x80000000; + ps->__value.__wch = buf[1]; + } + } } else if (status == __GCONV_INCOMPLETE_INPUT) result = (size_t) -2; |