diff options
author | Ulrich Drepper <drepper@gmail.com> | 2012-01-07 10:52:53 -0500 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2012-01-07 10:52:53 -0500 |
commit | 9954432e309c8fddaec2fe53e601702a5c981624 (patch) | |
tree | 3eb7513694e25391b3393afbb847dbd85ebf097a /wcsmbs | |
parent | c3a87236702cb73be1dada3438bbd3c3934e83f8 (diff) | |
download | glibc-9954432e309c8fddaec2fe53e601702a5c981624.zip glibc-9954432e309c8fddaec2fe53e601702a5c981624.tar.gz glibc-9954432e309c8fddaec2fe53e601702a5c981624.tar.bz2 |
More char16_t and char32_t support
It works now for UTF-8 locales
Diffstat (limited to 'wcsmbs')
-rw-r--r-- | wcsmbs/Makefile | 3 | ||||
-rw-r--r-- | wcsmbs/c16rtomb.c | 19 | ||||
-rw-r--r-- | wcsmbs/mbrtoc16.c | 52 | ||||
-rw-r--r-- | wcsmbs/tst-c16c32-1.c | 131 | ||||
-rw-r--r-- | wcsmbs/wcrtomb.c | 5 | ||||
-rw-r--r-- | wcsmbs/wcsmbsload.c | 84 |
6 files changed, 241 insertions, 53 deletions
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile index 8c446e1..010e0c8 100644 --- a/wcsmbs/Makefile +++ b/wcsmbs/Makefile @@ -1,4 +1,4 @@ -# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011 +# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011,2012 # Free Software Foundation, Inc. # This file is part of the GNU C Library. @@ -46,6 +46,7 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \ strop-tests := wcscmp wmemcmp wcslen wcschr wcsrchr wcscpy tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \ tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \ + tst-c16c32-1 \ wcsatcliff $(addprefix test-,$(strop-tests)) include ../Rules diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c index c75ca3b..3fed0b5 100644 --- a/wcsmbs/c16rtomb.c +++ b/wcsmbs/c16rtomb.c @@ -1,6 +1,6 @@ /* Copyright (C) 2011, 2012 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 2011. + Contributed by Ulrich Drepper <drepper@gmail.com>, 2011. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -44,7 +44,12 @@ static mbstate_t state; size_t c16rtomb (char *s, char16_t c16, mbstate_t *ps) { - char buf[MB_CUR_MAX]; +#if 1 + // XXX The ISO C 11 spec I have does not say anything about handling + // XXX surrogates in this interface. + return wcrtomb (s, c16, ps ?: &state); +#else + char buf[MB_LEN_MAX]; struct __gconv_step_data data; int status; size_t result; @@ -78,9 +83,9 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps) PTR_DEMANGLE (fct); #endif - /* If C16 is the NUL character we write into the output buffer the byte - sequence necessary for PS to get into the initial state, followed - by a NUL byte. */ + /* If C16 is the NUL character we write into the output buffer + the byte sequence necessary for PS to get into the initial + state, followed by a NUL byte. */ if (c16 == L'\0') { status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL, @@ -96,7 +101,8 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps) status = DL_CALL_FCT (fct, (fcts->fromc16, &data, &inbuf, - inbuf + sizeof (char16_t), NULL, &dummy, 0, 1)); + inbuf + sizeof (char16_t), NULL, &dummy, + 0, 1)); } /* There must not be any problems with the conversion but illegal input @@ -118,4 +124,5 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps) } return result; +#endif } diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c index 7b5822d..df970fb 100644 --- a/wcsmbs/mbrtoc16.c +++ b/wcsmbs/mbrtoc16.c @@ -1,6 +1,6 @@ /* Copyright (C) 2011, 2012 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.org>, 2011. + Contributed by Ulrich Drepper <drepper@gmail.com>, 2011. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -43,20 +43,32 @@ static mbstate_t state; size_t mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) { - char16_t buf[1]; + if (ps == NULL) + ps = &state; + + if (ps->__count & 0x80000000) + { + /* We have to return the second word for a surrogate. */ + ps->__count &= 0x7fffffff; + *pc16 = ps->__value.__wch; + ps->__value.__wch = L'\0'; + return (size_t) -3; + } + + char16_t buf[2]; struct __gconv_step_data data; int status; size_t result; size_t dummy; const unsigned char *inbuf, *endbuf; - unsigned char *outbuf = (unsigned char *) (pc16 ?: buf); + unsigned char *outbuf = (unsigned char *) buf; const struct gconv_fcts *fcts; /* Set information for this step. */ data.__invocation_counter = 0; data.__internal_use = 1; data.__flags = __GCONV_IS_LAST; - data.__statep = ps ?: &state; + data.__statep = ps; data.__trans = NULL; /* A first special case is if S is NULL. This means put PS in the @@ -85,9 +97,22 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (fcts->toc16->__shlib_handle != NULL) PTR_DEMANGLE (fct); #endif + + /* We first have to check whether the character can be represented + without a surrogate. If we immediately pass in a buffer large + enough to hold two char16_t values and the first character does + not require a surrogate the routine will try to convert more + input if N is larger then needed for the first character. */ status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, NULL, &dummy, 0, 1)); + if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf) + { + data.__outbufend = outbuf + 2 * sizeof (char16_t); + status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, + NULL, &dummy, 0, 1)); + } + /* There must not be any problems with the conversion but illegal input characters. The output buffer must be large enough, otherwise the definition of MB_CUR_MAX is not correct. All the other possible @@ -100,15 +125,28 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT || status == __GCONV_FULL_OUTPUT) { - if (data.__outbuf != (unsigned char *) outbuf - && *(char16_t *) outbuf == U('\0')) + if (pc16 != NULL) + *pc16 = buf[0]; + + if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0')) { /* The converted character is the NUL character. */ assert (__mbsinit (data.__statep)); result = 0; } else - result = inbuf - (const unsigned char *) s; + { + result = inbuf - (const unsigned char *) s; + + if (data.__outbuf != outbuf + 2) + { + /* This is a surrogate. */ + assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff); + assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff); + ps->__count |= 0x80000000; + ps->__value.__wch = buf[1]; + } + } } else if (status == __GCONV_INCOMPLETE_INPUT) result = (size_t) -2; diff --git a/wcsmbs/tst-c16c32-1.c b/wcsmbs/tst-c16c32-1.c new file mode 100644 index 0000000..f4534c5 --- /dev/null +++ b/wcsmbs/tst-c16c32-1.c @@ -0,0 +1,131 @@ +#include <inttypes.h> +#include <locale.h> +#include <stdio.h> +#include <uchar.h> + + +static int +do_test (void) +{ + if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) + { + puts ("cannot set locale"); + return 1; + } + + int result = 0; + + char32_t c32 = 48; + do + { + if (c32 >= 0xd800 && c32 <= 0xe000) + continue; + + char buf[20]; + size_t n1 = c32rtomb (buf, c32, NULL); + if (n1 <= 0) + { + printf ("c32rtomb for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32); + result = 1; + continue; + } + + char32_t c32out; + size_t n2 = mbrtoc32 (&c32out, buf, n1, NULL); + if ((ssize_t) n2 < 0) + { + printf ("mbrtoc32 for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32); + result = 1; + continue; + } + if (n2 != n1) + { + printf ("mbrtoc32 for U'\\x%" PRIx32 "' consumed %zu bytes, not %zu\n", + (uint32_t) c32, n2, n1); + result = 1; + } + else if (c32out != c32) + { + printf ("mbrtoc32 for U'\\x%" PRIx32 "' produced U'\\x%" PRIx32 "\n", + (uint32_t) c32, (uint32_t) c32out); + result = 1; + } + + char16_t c16; + size_t n3 = mbrtoc16 (&c16, buf, n1, NULL); + if (n3 != n1) + { + printf ("mbrtoc16 for U'\\x%" PRIx32 "' did not consume all bytes\n", + (uint32_t) c32); + result = 1; + continue; + } + if (c32 < 0x10000) + { + if (c16 != c32) + { + printf ("mbrtoc16 for U'\\x%" PRIx32 "' produce u'\\x%" PRIx16 "'\n", + (uint32_t) c32, (uint16_t) c16); + result = 1; + continue; + } + } + else + { + buf[0] = '1'; + char16_t c16_2; + size_t n4 = mbrtoc16 (&c16_2, buf, 1, NULL); + if (n4 != (size_t) -3) + { + printf ("second mbrtoc16 for U'\\x%" PRIx32 "' did not return -3\n", + (uint32_t) c32); + result = 1; + continue; + } + + if (c32 != (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00)) + { + printf ("mbrtoc16 for U'\\x%" PRIx32 "' returns U'\\x%" PRIx32 "\n", + (uint32_t) c32, + (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00)); + result = 1; + continue; + } + } + + buf[0] = '\0'; + char16_t c16_nul; + n3 = mbrtoc16 (&c16_nul, buf, n1, NULL); + if (n3 != 0) + { + printf ("mbrtoc16 for '\\0' returns %zd\n", n3); + result = 1; + continue; + } + + if (c32 < 0x10000) + { + size_t n5 = c16rtomb (buf, c16, NULL); + if ((ssize_t) n5 < 0) + { + printf ("c16rtomb for U'\\x%" PRIx32 "' failed with %zd\n", + (uint32_t) c32, n5); + result = 1; + continue; + } + if (n5 != n1) + { + printf ("c16rtomb for U'\\x%" PRIx32 "' produced %zu bytes instead of %zu bytes\n", + (uint32_t) c32, n5, n1); + result = 1; + continue; + } + } + } + while ((c32 += 0x1111) <= U'\x12000'); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c index 547b05a..946fdaf 100644 --- a/wcsmbs/wcrtomb.c +++ b/wcsmbs/wcrtomb.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1996-1998,2000,2002,2005,2011 Free Software Foundation, Inc. +/* Copyright (C) 1996-1998,2000,2002,2005,2011,2012 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -38,7 +39,7 @@ static mbstate_t state; size_t __wcrtomb (char *s, wchar_t wc, mbstate_t *ps) { - char buf[MB_CUR_MAX]; + char buf[MB_LEN_MAX]; struct __gconv_step_data data; int status; size_t result; diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c index 212a6c8..9ce26f1 100644 --- a/wcsmbs/wcsmbsload.c +++ b/wcsmbs/wcsmbsload.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011 Free Software Foundation, Inc. +/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011,2012 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -74,7 +75,7 @@ static const struct __gconv_step to_c16 = .__counter = INT_MAX, .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT", .__to_name = (char *) "UTF-16//", - .__fct = __gconv_transform_ascii_utf16, + .__fct = __gconv_transform_ascii_char16, .__btowc_fct = NULL, .__init_fct = NULL, .__end_fct = NULL, @@ -93,7 +94,7 @@ static const struct __gconv_step from_c16 = .__counter = INT_MAX, .__from_name = (char *) "UTF-16//", .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT", - .__fct = __gconv_transform_utf16_ascii, + .__fct = __gconv_transform_char16_ascii, .__btowc_fct = NULL, .__init_fct = NULL, .__end_fct = NULL, @@ -209,7 +210,7 @@ __wcsmbs_load_conv (struct __locale_data *new_category) int use_translit; /* Allocate the gconv_fcts structure. */ - new_fcts = malloc (sizeof *new_fcts); + new_fcts = calloc (1, sizeof *new_fcts); if (new_fcts == NULL) goto failed; @@ -229,16 +230,24 @@ __wcsmbs_load_conv (struct __locale_data *new_category) represent all others. */ new_fcts->towc = __wcsmbs_getfct ("INTERNAL", complete_name, &new_fcts->towc_nsteps); - new_fcts->tomb = (new_fcts->towc != NULL - ? __wcsmbs_getfct (complete_name, "INTERNAL", - &new_fcts->tomb_nsteps) - : NULL); + if (new_fcts->towc != NULL) + new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL", + &new_fcts->tomb_nsteps); - // XXX - new_fcts->toc16 = (struct __gconv_step *) &to_c16; - new_fcts->toc16_nsteps = 1; - new_fcts->fromc16 = (struct __gconv_step *) &from_c16; - new_fcts->fromc16_nsteps = 1; + if (new_fcts->tomb != NULL) + { + new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name, + &new_fcts->toc16_nsteps); + + if (new_fcts->toc16 != NULL) + new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16", + &new_fcts->fromc16_nsteps); + else + { + __gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps); + new_fcts->toc16 = NULL; + } + } /* If any of the conversion functions is not available we don't use any since this would mean we cannot convert back and @@ -255,6 +264,12 @@ __wcsmbs_load_conv (struct __locale_data *new_category) } else { + // XXX At least for now we live with the CHAR16 not being available. + if (new_fcts->toc16 == NULL) + new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16; + if (new_fcts->fromc16 == NULL) + new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16; + new_category->private.ctype = new_fcts; new_category->private.cleanup = &_nl_cleanup_ctype; } @@ -277,11 +292,15 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy) *copy = *orig; /* Now increment the usage counters. - Note: This assumes copy->towc_nsteps == 1 and copy->tomb_nsteps == 1. */ + Note: This assumes copy->*_nsteps == 1. */ if (copy->towc->__shlib_handle != NULL) ++copy->towc->__counter; if (copy->tomb->__shlib_handle != NULL) ++copy->tomb->__counter; + if (copy->toc16->__shlib_handle != NULL) + ++copy->toc16->__counter; + if (copy->fromc16->__shlib_handle != NULL) + ++copy->fromc16->__counter; } @@ -296,30 +315,24 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name) copy->tomb = __wcsmbs_getfct (name, "INTERNAL", ©->tomb_nsteps); if (copy->tomb == NULL) - goto out_mb; - -#if 0 - copy->fromc16 = __wcsmbs_getfct (name, "UTF-16//", ©->fromc16_nsteps); - if (copy->fromc16 == NULL) - goto out_fromc16; - - copy->toc16 = __wcsmbs_getfct ("UTF-16//", name, ©->toc16_nsteps); - if (copy->toc16 == NULL) -#else - if (0) -#endif { -#if 0 - __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps); - out_fromc16: - __gconv_close_transform (copy->tomb, copy->tomb_nsteps); -#endif - out_mb: __gconv_close_transform (copy->towc, copy->towc_nsteps); - out_wc: return 1; } + copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", ©->fromc16_nsteps); + if (copy->fromc16 == NULL) + copy->toc16 = NULL; + else + { + copy->toc16 = __wcsmbs_getfct ("CHAR16", name, ©->toc16_nsteps); + if (copy->toc16 == NULL) + { + __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps); + copy->fromc16 = NULL; + } + } + return 0; } @@ -335,11 +348,8 @@ _nl_cleanup_ctype (struct __locale_data *locale) /* Free the old conversions. */ __gconv_close_transform (data->tomb, data->tomb_nsteps); __gconv_close_transform (data->towc, data->towc_nsteps); -#if 0 - // XXX __gconv_close_transform (data->fromc16, data->fromc16_nsteps); - __gconv_close_transform (data->toc16, data->toc16c_nsteps); -#endif + __gconv_close_transform (data->toc16, data->toc16_nsteps); free ((char *) data); } } |