From db6af3ebf46a83b885455dc03a3c2c1c2c2dedec Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 28 Dec 2011 06:19:42 -0500 Subject: Add uchar.h support, part 1 c16 support for locales other than the C locale is still missing. --- wcsmbs/Makefile | 3 +- wcsmbs/Versions | 3 ++ wcsmbs/c16rtomb.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++ wcsmbs/mbrtoc16.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++ wcsmbs/mbrtowc.c | 7 ++- wcsmbs/uchar.h | 8 ++++ wcsmbs/wchar.h | 8 +++- wcsmbs/wcrtomb.c | 7 ++- wcsmbs/wcsmbsload.c | 90 +++++++++++++++++++++++++++++++++++--- wcsmbs/wcsmbsload.h | 7 ++- 10 files changed, 363 insertions(+), 13 deletions(-) create mode 100644 wcsmbs/c16rtomb.c create mode 100644 wcsmbs/mbrtoc16.c (limited to 'wcsmbs') diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile index 0bb1740..8c446e1 100644 --- a/wcsmbs/Makefile +++ b/wcsmbs/Makefile @@ -40,7 +40,8 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \ wcscasecmp wcsncase wcscasecmp_l wcsncase_l \ wcsmbsload mbsrtowcs_l \ isoc99_wscanf isoc99_vwscanf isoc99_fwscanf isoc99_vfwscanf \ - isoc99_swscanf isoc99_vswscanf + isoc99_swscanf isoc99_vswscanf \ + mbrtoc16 c16rtomb strop-tests := wcscmp wmemcmp wcslen wcschr wcsrchr wcscpy tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \ diff --git a/wcsmbs/Versions b/wcsmbs/Versions index b6dfa85..10bccc9 100644 --- a/wcsmbs/Versions +++ b/wcsmbs/Versions @@ -28,4 +28,7 @@ libc { __isoc99_wscanf; __isoc99_vwscanf; __isoc99_fwscanf; __isoc99_vfwscanf; __isoc99_swscanf; __isoc99_vswscanf; } + GLIBC_2.16 { + mbrtoc16; c16rtomb; mbrtoc32; c32rtomb; + } } diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c new file mode 100644 index 0000000..33e6b92 --- /dev/null +++ b/wcsmbs/c16rtomb.c @@ -0,0 +1,121 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifndef EILSEQ +# define EILSEQ EINVAL +#endif + +#if __STDC__ >= 20100L +# define u(c) U##c +#else +# define u(c) L##c +#endif + + +/* This is the private state used if PS is NULL. */ +static mbstate_t state; + +size_t +c16rtomb (char *s, char16_t c16, mbstate_t *ps) +{ + char buf[MB_CUR_MAX]; + struct __gconv_step_data data; + int status; + size_t result; + size_t dummy; + const struct gconv_fcts *fcts; + + /* Set information for this step. */ + data.__invocation_counter = 0; + data.__internal_use = 1; + data.__flags = __GCONV_IS_LAST; + data.__statep = ps ?: &state; + data.__trans = NULL; + + /* A first special case is if S is NULL. This means put PS in the + initial state. */ + if (s == NULL) + { + s = buf; + c16 = u('\0'); + } + + /* Tell where we want to have the result. */ + data.__outbuf = (unsigned char *) s; + data.__outbufend = (unsigned char *) s + MB_CUR_MAX; + + /* Get the conversion functions. */ + fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); + __gconv_fct fct = fcts->fromc16->__fct; +#ifdef PTR_DEMANGLE + if (fcts->tomb->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + + /* If C16 is the NUL character we write into the output buffer the byte + sequence necessary for PS to get into the initial state, followed + by a NUL byte. */ + if (c16 == L'\0') + { + status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL, + NULL, &dummy, 1, 1)); + + if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT) + *data.__outbuf++ = '\0'; + } + else + { + /* Do a normal conversion. */ + const unsigned char *inbuf = (const unsigned char *) &c16; + + status = DL_CALL_FCT (fct, + (fcts->fromc16, &data, &inbuf, + inbuf + sizeof (char16_t), NULL, &dummy, 0, 1)); + } + + /* There must not be any problems with the conversion but illegal input + characters. The output buffer must be large enough, otherwise the + definition of MB_CUR_MAX is not correct. All the other possible + errors also must not happen. */ + assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT + || status == __GCONV_ILLEGAL_INPUT + || status == __GCONV_INCOMPLETE_INPUT + || status == __GCONV_FULL_OUTPUT); + + if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT + || status == __GCONV_FULL_OUTPUT) + result = data.__outbuf - (unsigned char *) s; + else + { + result = (size_t) -1; + __set_errno (EILSEQ); + } + + return result; +} diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c new file mode 100644 index 0000000..3a3a45c --- /dev/null +++ b/wcsmbs/mbrtoc16.c @@ -0,0 +1,122 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include +#include +#include + +#include + +#ifndef EILSEQ +# define EILSEQ EINVAL +#endif + +#if __STDC__ >= 20100L +# define U(c) U##c +#else +# define U(c) L##c +#endif + + +/* This is the private state used if PS is NULL. */ +static mbstate_t state; + +size_t +mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) +{ + char16_t buf[1]; + struct __gconv_step_data data; + int status; + size_t result; + size_t dummy; + const unsigned char *inbuf, *endbuf; + unsigned char *outbuf = (unsigned char *) (pc16 ?: buf); + const struct gconv_fcts *fcts; + + /* Set information for this step. */ + data.__invocation_counter = 0; + data.__internal_use = 1; + data.__flags = __GCONV_IS_LAST; + data.__statep = ps ?: &state; + data.__trans = NULL; + + /* A first special case is if S is NULL. This means put PS in the + initial state. */ + if (s == NULL) + { + outbuf = (unsigned char *) buf; + s = ""; + n = 1; + } + + /* Tell where we want the result. */ + data.__outbuf = outbuf; + data.__outbufend = outbuf + sizeof (char16_t); + + /* Get the conversion functions. */ + fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); + + /* Do a normal conversion. */ + inbuf = (const unsigned char *) s; + endbuf = inbuf + n; + if (__builtin_expect (endbuf < inbuf, 0)) + endbuf = (const unsigned char *) ~(uintptr_t) 0; + __gconv_fct fct = fcts->toc16->__fct; +#ifdef PTR_DEMANGLE + if (fcts->toc16->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, + NULL, &dummy, 0, 1)); + + /* There must not be any problems with the conversion but illegal input + characters. The output buffer must be large enough, otherwise the + definition of MB_CUR_MAX is not correct. All the other possible + errors also must not happen. */ + assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT + || status == __GCONV_ILLEGAL_INPUT + || status == __GCONV_INCOMPLETE_INPUT + || status == __GCONV_FULL_OUTPUT); + + if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT + || status == __GCONV_FULL_OUTPUT) + { + if (data.__outbuf != (unsigned char *) outbuf + && *(char16_t *) outbuf == U('\0')) + { + /* The converted character is the NUL character. */ + assert (__mbsinit (data.__statep)); + result = 0; + } + else + result = inbuf - (const unsigned char *) s; + } + else if (status == __GCONV_INCOMPLETE_INPUT) + result = (size_t) -2; + else + { + result = (size_t) -1; + __set_errno (EILSEQ); + } + + return result; +} diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c index b534571..03b8348 100644 --- a/wcsmbs/mbrtowc.c +++ b/wcsmbs/mbrtowc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005 +/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -117,3 +117,8 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) libc_hidden_def (__mbrtowc) weak_alias (__mbrtowc, mbrtowc) libc_hidden_weak (mbrtowc) + +/* There should be no difference between the UTF-32 handling required + by mbrtoc32 and the wchar_t handling which has long since been + implemented in mbrtowc. */ +weak_alias (__mbrtowc, mbrtoc32) diff --git a/wcsmbs/uchar.h b/wcsmbs/uchar.h index 44637c3..bb5f3ba 100644 --- a/wcsmbs/uchar.h +++ b/wcsmbs/uchar.h @@ -31,6 +31,14 @@ #define __need_mbstate_t #include +#ifndef __mbstate_t_defined +__BEGIN_NAMESPACE_C99 +/* Public type. */ +typedef __mbstate_t mbstate_t; +__END_NAMESPACE_C99 +# define __mbstate_t_defined 1 +#endif + #ifdef __GNUC__ /* Define the 16-bit and 32-bit character types. Use the information diff --git a/wcsmbs/wchar.h b/wcsmbs/wchar.h index 2b35f51..ccaaed8 100644 --- a/wcsmbs/wchar.h +++ b/wcsmbs/wchar.h @@ -77,8 +77,8 @@ __END_NAMESPACE_STD # endif #endif -#if (defined _WCHAR_H || defined __need_mbstate_t) && !defined __mbstate_t_defined -# define __mbstate_t_defined 1 +#if (defined _WCHAR_H || defined __need_mbstate_t) && !defined ____mbstate_t_defined +# define ____mbstate_t_defined 1 /* Conversion state information. */ typedef struct { @@ -101,10 +101,14 @@ typedef struct defined. */ #ifdef _WCHAR_H +# ifndef __mbstate_t_defined __BEGIN_NAMESPACE_C99 /* Public type. */ typedef __mbstate_t mbstate_t; __END_NAMESPACE_C99 +# define __mbstate_t_defined 1 +# endif + #ifdef __USE_GNU __USING_NAMESPACE_C99(mbstate_t) #endif diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c index aa51b68..547b05a 100644 --- a/wcsmbs/wcrtomb.c +++ b/wcsmbs/wcrtomb.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996,1997,1998,2000,2002,2005 Free Software Foundation, Inc. +/* Copyright (C) 1996-1998,2000,2002,2005,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -115,3 +115,8 @@ __wcrtomb (char *s, wchar_t wc, mbstate_t *ps) } weak_alias (__wcrtomb, wcrtomb) libc_hidden_weak (wcrtomb) + +/* There should be no difference between the UTF-32 handling required + by c32rtomb and the wchar_t handling which has long since been + implemented in wcrtomb. */ +weak_alias (__wcrtomb, c32rtomb) diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c index 328f164..212a6c8 100644 --- a/wcsmbs/wcsmbsload.c +++ b/wcsmbs/wcsmbsload.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1998-2002,2004,2005,2008,2010 Free Software Foundation, Inc. +/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1998. @@ -67,6 +67,44 @@ static const struct __gconv_step to_mb = .__data = NULL }; +static const struct __gconv_step to_c16 = +{ + .__shlib_handle = NULL, + .__modname = NULL, + .__counter = INT_MAX, + .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT", + .__to_name = (char *) "UTF-16//", + .__fct = __gconv_transform_ascii_utf16, + .__btowc_fct = NULL, + .__init_fct = NULL, + .__end_fct = NULL, + .__min_needed_from = 1, + .__max_needed_from = 1, + .__min_needed_to = 4, + .__max_needed_to = 4, + .__stateful = 0, + .__data = NULL +}; + +static const struct __gconv_step from_c16 = +{ + .__shlib_handle = NULL, + .__modname = NULL, + .__counter = INT_MAX, + .__from_name = (char *) "UTF-16//", + .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT", + .__fct = __gconv_transform_utf16_ascii, + .__btowc_fct = NULL, + .__init_fct = NULL, + .__end_fct = NULL, + .__min_needed_from = 4, + .__max_needed_from = 4, + .__min_needed_to = 1, + .__max_needed_to = 1, + .__stateful = 0, + .__data = NULL +}; + /* For the default locale we only have to handle ANSI_X3.4-1968. */ const struct gconv_fcts __wcsmbs_gconv_fcts_c = @@ -74,7 +112,12 @@ const struct gconv_fcts __wcsmbs_gconv_fcts_c = .towc = (struct __gconv_step *) &to_wc, .towc_nsteps = 1, .tomb = (struct __gconv_step *) &to_mb, - .tomb_nsteps = 1 + .tomb_nsteps = 1, + + .toc16 = (struct __gconv_step *) &to_c16, + .toc16_nsteps = 1, + .fromc16 = (struct __gconv_step *) &from_c16, + .fromc16_nsteps = 1, }; @@ -191,6 +234,12 @@ __wcsmbs_load_conv (struct __locale_data *new_category) &new_fcts->tomb_nsteps) : NULL); + // XXX + new_fcts->toc16 = (struct __gconv_step *) &to_c16; + new_fcts->toc16_nsteps = 1; + new_fcts->fromc16 = (struct __gconv_step *) &from_c16; + new_fcts->fromc16_nsteps = 1; + /* If any of the conversion functions is not available we don't use any since this would mean we cannot convert back and forth.*/ @@ -242,14 +291,36 @@ internal_function __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name) { copy->towc = __wcsmbs_getfct ("INTERNAL", name, ©->towc_nsteps); - if (copy->towc != NULL) + if (copy->towc == NULL) + return 1; + + copy->tomb = __wcsmbs_getfct (name, "INTERNAL", ©->tomb_nsteps); + if (copy->tomb == NULL) + goto out_mb; + +#if 0 + copy->fromc16 = __wcsmbs_getfct (name, "UTF-16//", ©->fromc16_nsteps); + if (copy->fromc16 == NULL) + goto out_fromc16; + + copy->toc16 = __wcsmbs_getfct ("UTF-16//", name, ©->toc16_nsteps); + if (copy->toc16 == NULL) +#else + if (0) +#endif { - copy->tomb = __wcsmbs_getfct (name, "INTERNAL", ©->tomb_nsteps); - if (copy->tomb == NULL) - __gconv_close_transform (copy->towc, copy->towc_nsteps); +#if 0 + __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps); + out_fromc16: + __gconv_close_transform (copy->tomb, copy->tomb_nsteps); +#endif + out_mb: + __gconv_close_transform (copy->towc, copy->towc_nsteps); + out_wc: + return 1; } - return copy->towc == NULL || copy->tomb == NULL ? 1 : 0; + return 0; } void internal_function @@ -264,6 +335,11 @@ _nl_cleanup_ctype (struct __locale_data *locale) /* Free the old conversions. */ __gconv_close_transform (data->tomb, data->tomb_nsteps); __gconv_close_transform (data->towc, data->towc_nsteps); +#if 0 + // XXX + __gconv_close_transform (data->fromc16, data->fromc16_nsteps); + __gconv_close_transform (data->toc16, data->toc16c_nsteps); +#endif free ((char *) data); } } diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h index e2b1bfa..064c41c 100644 --- a/wcsmbs/wcsmbsload.h +++ b/wcsmbs/wcsmbsload.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1998-2002, 2010 Free Software Foundation, Inc. +/* Copyright (C) 1998-2002, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1998. @@ -32,6 +32,11 @@ struct gconv_fcts size_t towc_nsteps; struct __gconv_step *tomb; size_t tomb_nsteps; + + struct __gconv_step *toc16; + size_t toc16_nsteps; + struct __gconv_step *fromc16; + size_t fromc16_nsteps; }; /* Set of currently active conversion functions. */ -- cgit v1.1