aboutsummaryrefslogtreecommitdiff
path: root/wcsmbs
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2012-01-08 07:19:21 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-08 07:19:21 -0500
commitd3ed722566f42d3f614b1221a8e4f19092976531 (patch)
tree4a63e059ef599167cf407311188551fe72221d8d /wcsmbs
parenta0da5fe1e49b819b4d90b77915e21cddd397d064 (diff)
downloadglibc-d3ed722566f42d3f614b1221a8e4f19092976531.zip
glibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.gz
glibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.bz2
Simplify char16_t implementation
Diffstat (limited to 'wcsmbs')
-rw-r--r--wcsmbs/c16rtomb.c97
-rw-r--r--wcsmbs/mbrtoc16.c75
-rw-r--r--wcsmbs/mbrtowc.c9
-rw-r--r--wcsmbs/wcsmbsload.c85
-rw-r--r--wcsmbs/wcsmbsload.h5
5 files changed, 43 insertions, 228 deletions
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
index 3fed0b5..5374c75 100644
--- a/wcsmbs/c16rtomb.c
+++ b/wcsmbs/c16rtomb.c
@@ -17,25 +17,8 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#include <assert.h>
-#include <dlfcn.h>
-#include <errno.h>
-#include <gconv.h>
-#include <stdlib.h>
#include <uchar.h>
-#include <wcsmbsload.h>
-
-#include <sysdep.h>
-
-#ifndef EILSEQ
-# define EILSEQ EINVAL
-#endif
-
-#if __STDC__ >= 201000L
-# define u(c) U##c
-#else
-# define u(c) L##c
-#endif
+#include <wchar.h>
/* This is the private state used if PS is NULL. */
@@ -44,85 +27,7 @@ static mbstate_t state;
size_t
c16rtomb (char *s, char16_t c16, mbstate_t *ps)
{
-#if 1
// XXX The ISO C 11 spec I have does not say anything about handling
// XXX surrogates in this interface.
return wcrtomb (s, c16, ps ?: &state);
-#else
- char buf[MB_LEN_MAX];
- struct __gconv_step_data data;
- int status;
- size_t result;
- size_t dummy;
- const struct gconv_fcts *fcts;
-
- /* Set information for this step. */
- data.__invocation_counter = 0;
- data.__internal_use = 1;
- data.__flags = __GCONV_IS_LAST;
- data.__statep = ps ?: &state;
- data.__trans = NULL;
-
- /* A first special case is if S is NULL. This means put PS in the
- initial state. */
- if (s == NULL)
- {
- s = buf;
- c16 = u('\0');
- }
-
- /* Tell where we want to have the result. */
- data.__outbuf = (unsigned char *) s;
- data.__outbufend = (unsigned char *) s + MB_CUR_MAX;
-
- /* Get the conversion functions. */
- fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
- __gconv_fct fct = fcts->fromc16->__fct;
-#ifdef PTR_DEMANGLE
- if (fcts->tomb->__shlib_handle != NULL)
- PTR_DEMANGLE (fct);
-#endif
-
- /* If C16 is the NUL character we write into the output buffer
- the byte sequence necessary for PS to get into the initial
- state, followed by a NUL byte. */
- if (c16 == L'\0')
- {
- status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL,
- NULL, &dummy, 1, 1));
-
- if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
- *data.__outbuf++ = '\0';
- }
- else
- {
- /* Do a normal conversion. */
- const unsigned char *inbuf = (const unsigned char *) &c16;
-
- status = DL_CALL_FCT (fct,
- (fcts->fromc16, &data, &inbuf,
- inbuf + sizeof (char16_t), NULL, &dummy,
- 0, 1));
- }
-
- /* There must not be any problems with the conversion but illegal input
- characters. The output buffer must be large enough, otherwise the
- definition of MB_CUR_MAX is not correct. All the other possible
- errors also must not happen. */
- assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
- || status == __GCONV_ILLEGAL_INPUT
- || status == __GCONV_INCOMPLETE_INPUT
- || status == __GCONV_FULL_OUTPUT);
-
- if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
- || status == __GCONV_FULL_OUTPUT)
- result = data.__outbuf - (unsigned char *) s;
- else
- {
- result = (size_t) -1;
- __set_errno (EILSEQ);
- }
-
- return result;
-#endif
}
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
index df970fb..f5ed2b4 100644
--- a/wcsmbs/mbrtoc16.c
+++ b/wcsmbs/mbrtoc16.c
@@ -30,12 +30,6 @@
# define EILSEQ EINVAL
#endif
-#if __STDC__ >= 201000L
-# define U(c) U##c
-#else
-# define U(c) L##c
-#endif
-
/* This is the private state used if PS is NULL. */
static mbstate_t state;
@@ -46,6 +40,11 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
if (ps == NULL)
ps = &state;
+ /* The standard text does not say that S being NULL means the state
+ is reset even if the second half of a surrogate still have to be
+ returned. In fact, the error code description indicates
+ otherwise. Therefore always first try to return a second
+ half. */
if (ps->__count & 0x80000000)
{
/* We have to return the second word for a surrogate. */
@@ -55,13 +54,13 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
return (size_t) -3;
}
- char16_t buf[2];
+ wchar_t wc;
struct __gconv_step_data data;
int status;
size_t result;
size_t dummy;
const unsigned char *inbuf, *endbuf;
- unsigned char *outbuf = (unsigned char *) buf;
+ unsigned char *outbuf = (unsigned char *) &wc;
const struct gconv_fcts *fcts;
/* Set information for this step. */
@@ -75,14 +74,14 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
initial state. */
if (s == NULL)
{
- outbuf = (unsigned char *) buf;
+ pc16 = NULL;
s = "";
n = 1;
}
/* Tell where we want the result. */
data.__outbuf = outbuf;
- data.__outbufend = outbuf + sizeof (char16_t);
+ data.__outbufend = outbuf + sizeof (wchar_t);
/* Get the conversion functions. */
fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
@@ -91,28 +90,20 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
inbuf = (const unsigned char *) s;
endbuf = inbuf + n;
if (__builtin_expect (endbuf < inbuf, 0))
- endbuf = (const unsigned char *) ~(uintptr_t) 0;
- __gconv_fct fct = fcts->toc16->__fct;
+ {
+ endbuf = (const unsigned char *) ~(uintptr_t) 0;
+ if (endbuf == inbuf)
+ goto ilseq;
+ }
+ __gconv_fct fct = fcts->towc->__fct;
#ifdef PTR_DEMANGLE
- if (fcts->toc16->__shlib_handle != NULL)
+ if (fcts->towc->__shlib_handle != NULL)
PTR_DEMANGLE (fct);
#endif
- /* We first have to check whether the character can be represented
- without a surrogate. If we immediately pass in a buffer large
- enough to hold two char16_t values and the first character does
- not require a surrogate the routine will try to convert more
- input if N is larger then needed for the first character. */
- status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+ status = DL_CALL_FCT (fct, (fcts->towc, &data, &inbuf, endbuf,
NULL, &dummy, 0, 1));
- if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf)
- {
- data.__outbufend = outbuf + 2 * sizeof (char16_t);
- status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
- NULL, &dummy, 0, 1));
- }
-
/* There must not be any problems with the conversion but illegal input
characters. The output buffer must be large enough, otherwise the
definition of MB_CUR_MAX is not correct. All the other possible
@@ -125,33 +116,35 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|| status == __GCONV_FULL_OUTPUT)
{
- if (pc16 != NULL)
- *pc16 = buf[0];
+ result = inbuf - (const unsigned char *) s;
- if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0'))
+ if (wc < 0x10000)
{
- /* The converted character is the NUL character. */
- assert (__mbsinit (data.__statep));
- result = 0;
+ if (pc16 != NULL)
+ *pc16 = wc;
+
+ if (data.__outbuf != outbuf && wc == L'\0')
+ {
+ /* The converted character is the NUL character. */
+ assert (__mbsinit (data.__statep));
+ result = 0;
+ }
}
else
{
- result = inbuf - (const unsigned char *) s;
+ /* This is a surrogate. */
+ if (pc16 != NULL)
+ *pc16 = 0xd7c0 + (wc >> 10);
- if (data.__outbuf != outbuf + 2)
- {
- /* This is a surrogate. */
- assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff);
- assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff);
- ps->__count |= 0x80000000;
- ps->__value.__wch = buf[1];
- }
+ ps->__count |= 0x80000000;
+ ps->__value.__wch = 0xdc00 + (wc & 0x3ff);
}
}
else if (status == __GCONV_INCOMPLETE_INPUT)
result = (size_t) -2;
else
{
+ ilseq:
result = (size_t) -1;
__set_errno (EILSEQ);
}
diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c
index 03b8348..0c99b74 100644
--- a/wcsmbs/mbrtowc.c
+++ b/wcsmbs/mbrtowc.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011, 2012
Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
@@ -73,7 +73,11 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
inbuf = (const unsigned char *) s;
endbuf = inbuf + n;
if (__builtin_expect (endbuf < inbuf, 0))
- endbuf = (const unsigned char *) ~(uintptr_t) 0;
+ {
+ endbuf = (const unsigned char *) ~(uintptr_t) 0;
+ if (endbuf == inbuf)
+ goto ilseq;
+ }
__gconv_fct fct = fcts->towc->__fct;
#ifdef PTR_DEMANGLE
if (fcts->towc->__shlib_handle != NULL)
@@ -108,6 +112,7 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
result = (size_t) -2;
else
{
+ ilseq:
result = (size_t) -1;
__set_errno (EILSEQ);
}
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index 9ce26f1..27ea442 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -68,44 +68,6 @@ static const struct __gconv_step to_mb =
.__data = NULL
};
-static const struct __gconv_step to_c16 =
-{
- .__shlib_handle = NULL,
- .__modname = NULL,
- .__counter = INT_MAX,
- .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
- .__to_name = (char *) "UTF-16//",
- .__fct = __gconv_transform_ascii_char16,
- .__btowc_fct = NULL,
- .__init_fct = NULL,
- .__end_fct = NULL,
- .__min_needed_from = 1,
- .__max_needed_from = 1,
- .__min_needed_to = 4,
- .__max_needed_to = 4,
- .__stateful = 0,
- .__data = NULL
-};
-
-static const struct __gconv_step from_c16 =
-{
- .__shlib_handle = NULL,
- .__modname = NULL,
- .__counter = INT_MAX,
- .__from_name = (char *) "UTF-16//",
- .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
- .__fct = __gconv_transform_char16_ascii,
- .__btowc_fct = NULL,
- .__init_fct = NULL,
- .__end_fct = NULL,
- .__min_needed_from = 4,
- .__max_needed_from = 4,
- .__min_needed_to = 1,
- .__max_needed_to = 1,
- .__stateful = 0,
- .__data = NULL
-};
-
/* For the default locale we only have to handle ANSI_X3.4-1968. */
const struct gconv_fcts __wcsmbs_gconv_fcts_c =
@@ -114,11 +76,6 @@ const struct gconv_fcts __wcsmbs_gconv_fcts_c =
.towc_nsteps = 1,
.tomb = (struct __gconv_step *) &to_mb,
.tomb_nsteps = 1,
-
- .toc16 = (struct __gconv_step *) &to_c16,
- .toc16_nsteps = 1,
- .fromc16 = (struct __gconv_step *) &from_c16,
- .fromc16_nsteps = 1,
};
@@ -234,24 +191,9 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL",
&new_fcts->tomb_nsteps);
- if (new_fcts->tomb != NULL)
- {
- new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name,
- &new_fcts->toc16_nsteps);
-
- if (new_fcts->toc16 != NULL)
- new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16",
- &new_fcts->fromc16_nsteps);
- else
- {
- __gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps);
- new_fcts->toc16 = NULL;
- }
- }
-
/* If any of the conversion functions is not available we don't
use any since this would mean we cannot convert back and
- forth.*/
+ forth. NB: NEW_FCTS was allocated with calloc. */
if (new_fcts->tomb == NULL)
{
if (new_fcts->towc != NULL)
@@ -264,12 +206,6 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
}
else
{
- // XXX At least for now we live with the CHAR16 not being available.
- if (new_fcts->toc16 == NULL)
- new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16;
- if (new_fcts->fromc16 == NULL)
- new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16;
-
new_category->private.ctype = new_fcts;
new_category->private.cleanup = &_nl_cleanup_ctype;
}
@@ -297,10 +233,6 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy)
++copy->towc->__counter;
if (copy->tomb->__shlib_handle != NULL)
++copy->tomb->__counter;
- if (copy->toc16->__shlib_handle != NULL)
- ++copy->toc16->__counter;
- if (copy->fromc16->__shlib_handle != NULL)
- ++copy->fromc16->__counter;
}
@@ -320,19 +252,6 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
return 1;
}
- copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", &copy->fromc16_nsteps);
- if (copy->fromc16 == NULL)
- copy->toc16 = NULL;
- else
- {
- copy->toc16 = __wcsmbs_getfct ("CHAR16", name, &copy->toc16_nsteps);
- if (copy->toc16 == NULL)
- {
- __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
- copy->fromc16 = NULL;
- }
- }
-
return 0;
}
@@ -348,8 +267,6 @@ _nl_cleanup_ctype (struct __locale_data *locale)
/* Free the old conversions. */
__gconv_close_transform (data->tomb, data->tomb_nsteps);
__gconv_close_transform (data->towc, data->towc_nsteps);
- __gconv_close_transform (data->fromc16, data->fromc16_nsteps);
- __gconv_close_transform (data->toc16, data->toc16_nsteps);
free ((char *) data);
}
}
diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
index 064c41c..98f53bc 100644
--- a/wcsmbs/wcsmbsload.h
+++ b/wcsmbs/wcsmbsload.h
@@ -32,11 +32,6 @@ struct gconv_fcts
size_t towc_nsteps;
struct __gconv_step *tomb;
size_t tomb_nsteps;
-
- struct __gconv_step *toc16;
- size_t toc16_nsteps;
- struct __gconv_step *fromc16;
- size_t fromc16_nsteps;
};
/* Set of currently active conversion functions. */