From 4e82c610255f6a186c20c73e74f8e71dcda98efc Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 4 Apr 2005 22:37:38 +0000 Subject: * intl/tst-gettext3.c: New file. * intl/tst-gettext3.sh: New file. * intl/Makefile (distribute): Add tst-gettext3.sh. (test-srcs): Add tst-gettext3. (tests): Depend on tst-gettext3.out. (tst-gettext3.out): New rule. (CFLAGS-tst-gettext3.c): New variable. Fix bug exposed by tst-gettext3. * intl/gettextP.h (struct converted_domain): New type. (struct loaded_domain): Remove the conv, conv_tab fields. Add conversions, nconversions fields. (_nl_init_domain_conv): Remove declaration. (_nl_free_domain_conv): Remove declaration. (_nl_find_msg): Add convert argument. * intl/dcigettext.c (DCIGETTEXT): Call _nl_find_msg with convert=1. (_nl_find_msg): Add convert argument. When a conversion to a different charset is needed, create a new converted_domain element, instead of throwing away the old converted translations. (get_output_charset): New function. * intl/loadmsgcat.c (_nl_init_domain_conv): Remove function. (_nl_free_domain_conv): Remove function. (_nl_load_domain): Initialize the conversions array to empty. Use _nl_find_msg instead of _nl_init_domain_conv to retrieve the header entry. (_nl_unload_domain): Free the conversions array and its contents. * intl/gettextP.h (struct loaded_domain): Remove codeset_cntr field. (struct binding): Likewise. * intl/bindtextdom.c (set_binding_values): Drop codeset_cntr modifications. --- intl/dcigettext.c | 485 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 335 insertions(+), 150 deletions(-) (limited to 'intl/dcigettext.c') diff --git a/intl/dcigettext.c b/intl/dcigettext.c index 8163064..c73c719 100644 --- a/intl/dcigettext.c +++ b/intl/dcigettext.c @@ -326,6 +326,10 @@ static struct transmem_list *transmem_list; #else typedef unsigned char transmem_block_t; #endif +#if defined _LIBC || HAVE_ICONV +static const char *get_output_charset PARAMS ((struct binding *domainbinding)) + internal_function; +#endif /* Names for the libintl functions are a problem. They must not clash @@ -597,7 +601,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category) if (domain != NULL) { - retval = _nl_find_msg (domain, binding, msgid1, &retlen); + retval = _nl_find_msg (domain, binding, msgid1, 1, &retlen); if (retval == NULL) { @@ -606,7 +610,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category) for (cnt = 0; domain->successor[cnt] != NULL; ++cnt) { retval = _nl_find_msg (domain->successor[cnt], binding, - msgid1, &retlen); + msgid1, 1, &retlen); if (retval != NULL) { @@ -683,10 +687,11 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category) char * internal_function -_nl_find_msg (domain_file, domainbinding, msgid, lengthp) +_nl_find_msg (domain_file, domainbinding, msgid, convert, lengthp) struct loaded_l10nfile *domain_file; struct binding *domainbinding; const char *msgid; + int convert; size_t *lengthp; { struct loaded_domain *domain; @@ -793,192 +798,317 @@ _nl_find_msg (domain_file, domainbinding, msgid, lengthp) } #if defined _LIBC || HAVE_ICONV - if (domain->codeset_cntr - != (domainbinding != NULL ? domainbinding->codeset_cntr : 0)) + if (convert) { - /* The domain's codeset has changed through bind_textdomain_codeset() - since the message catalog was initialized or last accessed. We - have to reinitialize the converter. */ - _nl_free_domain_conv (domain); - _nl_init_domain_conv (domain_file, domain, domainbinding); - } + /* We are supposed to do a conversion. */ + const char *encoding = get_output_charset (domainbinding); + + /* Search whether a table with converted translations for this + encoding has already been allocated. */ + size_t nconversions = domain->nconversions; + struct converted_domain *convd = NULL; + size_t i; + + for (i = nconversions; i > 0; ) + { + i--; + if (strcmp (domain->conversions[i].encoding, encoding) == 0) + { + convd = &domain->conversions[i]; + break; + } + } - if ( + if (convd == NULL) + { + /* Allocate a table for the converted translations for this + encoding. */ + struct converted_domain *new_conversions = + (struct converted_domain *) + (domain->conversions != NULL + ? realloc (domain->conversions, + (nconversions + 1) * sizeof (struct converted_domain)) + : malloc ((nconversions + 1) * sizeof (struct converted_domain))); + + if (__builtin_expect (new_conversions == NULL, 0)) + /* Nothing we can do, no more memory. */ + goto converted; + domain->conversions = new_conversions; + + /* Copy the 'encoding' string to permanent storage. */ + encoding = strdup (encoding); + if (__builtin_expect (encoding == NULL, 0)) + /* Nothing we can do, no more memory. */ + goto converted; + + convd = &new_conversions[nconversions]; + convd->encoding = encoding; + + /* Find out about the character set the file is encoded with. + This can be found (in textual form) in the entry "". If this + entry does not exist or if this does not contain the 'charset=' + information, we will assume the charset matches the one the + current locale and we don't have to perform any conversion. */ # ifdef _LIBC - domain->conv != (__gconv_t) -1 + convd->conv = (__gconv_t) -1; # else # if HAVE_ICONV - domain->conv != (iconv_t) -1 + convd->conv = (iconv_t) -1; # endif # endif - ) - { - /* We are supposed to do a conversion. First allocate an - appropriate table with the same structure as the table - of translations in the file, where we can put the pointers - to the converted strings in. - There is a slight complication with plural entries. They - are represented by consecutive NUL terminated strings. We - handle this case by converting RESULTLEN bytes, including - NULs. */ - - if (domain->conv_tab == NULL - && ((domain->conv_tab = - (char **) calloc (nstrings + domain->n_sysdep_strings, - sizeof (char *))) - == NULL)) - /* Mark that we didn't succeed allocating a table. */ - domain->conv_tab = (char **) -1; - - if (__builtin_expect (domain->conv_tab == (char **) -1, 0)) - /* Nothing we can do, no more memory. */ - goto converted; - - if (domain->conv_tab[act] == NULL) + { + char *nullentry; + size_t nullentrylen; + + /* Get the header entry. This is a recursion, but it doesn't + reallocate domain->conversions because we pass convert = 0. */ + nullentry = + _nl_find_msg (domain_file, domainbinding, "", 0, &nullentrylen); + + if (nullentry != NULL) + { + const char *charsetstr; + + charsetstr = strstr (nullentry, "charset="); + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *outcharset; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + + charset = (char *) alloca (len + 1); +# if defined _LIBC || HAVE_MEMPCPY + *((char *) mempcpy (charset, charsetstr, len)) = '\0'; +# else + memcpy (charset, charsetstr, len); + charset[len] = '\0'; +# endif + + outcharset = encoding; + +# ifdef _LIBC + /* We always want to use transliteration. */ + outcharset = norm_add_slashes (outcharset, "TRANSLIT"); + charset = norm_add_slashes (charset, ""); + if (__gconv_open (outcharset, charset, &convd->conv, + GCONV_AVOID_NOCONV) + != __GCONV_OK) + convd->conv = (__gconv_t) -1; +# else +# if HAVE_ICONV + /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, + we want to use transliteration. */ +# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \ + || _LIBICONV_VERSION >= 0x0105 + if (strchr (outcharset, '/') == NULL) + { + char *tmp; + + len = strlen (outcharset); + tmp = (char *) alloca (len + 10 + 1); + memcpy (tmp, outcharset, len); + memcpy (tmp + len, "//TRANSLIT", 10 + 1); + outcharset = tmp; + + convd->conv = iconv_open (outcharset, charset); + + freea (outcharset); + } + else +# endif + convd->conv = iconv_open (outcharset, charset); +# endif +# endif + + freea (charset); + } + } + } + convd->conv_tab = NULL; + /* Here domain->conversions is still == new_conversions. */ + domain->nconversions++; + } + + if ( +# ifdef _LIBC + convd->conv != (__gconv_t) -1 +# else +# if HAVE_ICONV + convd->conv != (iconv_t) -1 +# endif +# endif + ) { - /* We haven't used this string so far, so it is not - translated yet. Do this now. */ - /* We use a bit more efficient memory handling. - We allocate always larger blocks which get used over - time. This is faster than many small allocations. */ - __libc_lock_define_initialized (static, lock) + /* We are supposed to do a conversion. First allocate an + appropriate table with the same structure as the table + of translations in the file, where we can put the pointers + to the converted strings in. + There is a slight complication with plural entries. They + are represented by consecutive NUL terminated strings. We + handle this case by converting RESULTLEN bytes, including + NULs. */ + + if (convd->conv_tab == NULL + && ((convd->conv_tab = + (char **) calloc (nstrings + domain->n_sysdep_strings, + sizeof (char *))) + == NULL)) + /* Mark that we didn't succeed allocating a table. */ + convd->conv_tab = (char **) -1; + + if (__builtin_expect (convd->conv_tab == (char **) -1, 0)) + /* Nothing we can do, no more memory. */ + goto converted; + + if (convd->conv_tab[act] == NULL) + { + /* We haven't used this string so far, so it is not + translated yet. Do this now. */ + /* We use a bit more efficient memory handling. + We allocate always larger blocks which get used over + time. This is faster than many small allocations. */ + __libc_lock_define_initialized (static, lock) # define INITIAL_BLOCK_SIZE 4080 - static unsigned char *freemem; - static size_t freemem_size; + static unsigned char *freemem; + static size_t freemem_size; - const unsigned char *inbuf; - unsigned char *outbuf; - int malloc_count; + const unsigned char *inbuf; + unsigned char *outbuf; + int malloc_count; # ifndef _LIBC - transmem_block_t *transmem_list = NULL; + transmem_block_t *transmem_list = NULL; # endif - __libc_lock_lock (lock); + __libc_lock_lock (lock); - inbuf = (const unsigned char *) result; - outbuf = freemem + sizeof (size_t); + inbuf = (const unsigned char *) result; + outbuf = freemem + sizeof (size_t); - malloc_count = 0; - while (1) - { - transmem_block_t *newmem; + malloc_count = 0; + while (1) + { + transmem_block_t *newmem; # ifdef _LIBC - size_t non_reversible; - int res; + size_t non_reversible; + int res; - if (freemem_size < sizeof (size_t)) - goto resize_freemem; + if (freemem_size < sizeof (size_t)) + goto resize_freemem; - res = __gconv (domain->conv, - &inbuf, inbuf + resultlen, - &outbuf, - outbuf + freemem_size - sizeof (size_t), - &non_reversible); + res = __gconv (convd->conv, + &inbuf, inbuf + resultlen, + &outbuf, + outbuf + freemem_size - sizeof (size_t), + &non_reversible); - if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT) - break; + if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT) + break; - if (res != __GCONV_FULL_OUTPUT) - { - __libc_lock_unlock (lock); - goto converted; - } + if (res != __GCONV_FULL_OUTPUT) + { + __libc_lock_unlock (lock); + goto converted; + } - inbuf = (const unsigned char *) result; + inbuf = (const unsigned char *) result; # else # if HAVE_ICONV - const char *inptr = (const char *) inbuf; - size_t inleft = resultlen; - char *outptr = (char *) outbuf; - size_t outleft; - - if (freemem_size < sizeof (size_t)) - goto resize_freemem; - - outleft = freemem_size - sizeof (size_t); - if (iconv (domain->conv, - (ICONV_CONST char **) &inptr, &inleft, - &outptr, &outleft) - != (size_t) (-1)) - { - outbuf = (unsigned char *) outptr; - break; - } - if (errno != E2BIG) - { - __libc_lock_unlock (lock); - goto converted; - } + const char *inptr = (const char *) inbuf; + size_t inleft = resultlen; + char *outptr = (char *) outbuf; + size_t outleft; + + if (freemem_size < sizeof (size_t)) + goto resize_freemem; + + outleft = freemem_size - sizeof (size_t); + if (iconv (convd->conv, + (ICONV_CONST char **) &inptr, &inleft, + &outptr, &outleft) + != (size_t) (-1)) + { + outbuf = (unsigned char *) outptr; + break; + } + if (errno != E2BIG) + { + __libc_lock_unlock (lock); + goto converted; + } # endif # endif - resize_freemem: - /* We must allocate a new buffer or resize the old one. */ - if (malloc_count > 0) - { - ++malloc_count; - freemem_size = malloc_count * INITIAL_BLOCK_SIZE; - newmem = (transmem_block_t *) realloc (transmem_list, - freemem_size); + resize_freemem: + /* We must allocate a new buffer or resize the old one. */ + if (malloc_count > 0) + { + ++malloc_count; + freemem_size = malloc_count * INITIAL_BLOCK_SIZE; + newmem = (transmem_block_t *) realloc (transmem_list, + freemem_size); # ifdef _LIBC - if (newmem != NULL) - transmem_list = transmem_list->next; + if (newmem != NULL) + transmem_list = transmem_list->next; + else + { + struct transmem_list *old = transmem_list; + + transmem_list = transmem_list->next; + free (old); + } +# endif + } else { - struct transmem_list *old = transmem_list; - - transmem_list = transmem_list->next; - free (old); + malloc_count = 1; + freemem_size = INITIAL_BLOCK_SIZE; + newmem = (transmem_block_t *) malloc (freemem_size); + } + if (__builtin_expect (newmem == NULL, 0)) + { + freemem = NULL; + freemem_size = 0; + __libc_lock_unlock (lock); + goto converted; } -# endif - } - else - { - malloc_count = 1; - freemem_size = INITIAL_BLOCK_SIZE; - newmem = (transmem_block_t *) malloc (freemem_size); - } - if (__builtin_expect (newmem == NULL, 0)) - { - freemem = NULL; - freemem_size = 0; - __libc_lock_unlock (lock); - goto converted; - } # ifdef _LIBC - /* Add the block to the list of blocks we have to free - at some point. */ - newmem->next = transmem_list; - transmem_list = newmem; + /* Add the block to the list of blocks we have to free + at some point. */ + newmem->next = transmem_list; + transmem_list = newmem; - freemem = (unsigned char *) newmem->data; - freemem_size -= offsetof (struct transmem_list, data); + freemem = (unsigned char *) newmem->data; + freemem_size -= offsetof (struct transmem_list, data); # else - transmem_list = newmem; - freemem = newmem; + transmem_list = newmem; + freemem = newmem; # endif - outbuf = freemem + sizeof (size_t); + outbuf = freemem + sizeof (size_t); + } + + /* We have now in our buffer a converted string. Put this + into the table of conversions. */ + *(size_t *) freemem = outbuf - freemem - sizeof (size_t); + convd->conv_tab[act] = (char *) freemem; + /* Shrink freemem, but keep it aligned. */ + freemem_size -= outbuf - freemem; + freemem = outbuf; + freemem += freemem_size & (alignof (size_t) - 1); + freemem_size = freemem_size & ~ (alignof (size_t) - 1); + + __libc_lock_unlock (lock); } - /* We have now in our buffer a converted string. Put this - into the table of conversions. */ - *(size_t *) freemem = outbuf - freemem - sizeof (size_t); - domain->conv_tab[act] = (char *) freemem; - /* Shrink freemem, but keep it aligned. */ - freemem_size -= outbuf - freemem; - freemem = outbuf; - freemem += freemem_size & (alignof (size_t) - 1); - freemem_size = freemem_size & ~ (alignof (size_t) - 1); - - __libc_lock_unlock (lock); + /* Now convd->conv_tab[act] contains the translation of all + the plural variants. */ + result = convd->conv_tab[act] + sizeof (size_t); + resultlen = *(size_t *) convd->conv_tab[act]; } - - /* Now domain->conv_tab[act] contains the translation of all - the plural variants. */ - result = domain->conv_tab[act] + sizeof (size_t); - resultlen = *(size_t *) domain->conv_tab[act]; } converted: @@ -1122,6 +1252,61 @@ guess_category_value (category, categoryname) return language != NULL && strcmp (retval, "C") != 0 ? language : retval; } +#if defined _LIBC || HAVE_ICONV +/* Returns the output charset. */ +static const char * +internal_function +get_output_charset (domainbinding) + struct binding *domainbinding; +{ + /* The output charset should normally be determined by the locale. But + sometimes the locale is not used or not correctly set up, so we provide + a possibility for the user to override this: the OUTPUT_CHARSET + environment variable. Moreover, the value specified through + bind_textdomain_codeset overrides both. */ + if (domainbinding != NULL && domainbinding->codeset != NULL) + return domainbinding->codeset; + else + { + /* For speed reasons, we look at the value of OUTPUT_CHARSET only + once. This is a user variable that is not supposed to change + during a program run. */ + static char *output_charset_cache; + static int output_charset_cached; + + if (!output_charset_cached) + { + const char *value = getenv ("OUTPUT_CHARSET"); + + if (value != NULL && value[0] != '\0') + { + size_t len = strlen (value) + 1; + char *value_copy = (char *) malloc (len); + + if (value_copy != NULL) + memcpy (value_copy, value, len); + output_charset_cache = value_copy; + } + output_charset_cached = 1; + } + + if (output_charset_cache != NULL) + return output_charset_cache; + else + { +# ifdef _LIBC + return _NL_CURRENT (LC_CTYPE, CODESET); +# else +# if HAVE_ICONV + extern const char *locale_charset PARAMS ((void); + return locale_charset (); +# endif +# endif + } + } +} +#endif + /* @@ begin of epilog @@ */ /* We don't want libintl.a to depend on any other library. So we -- cgit v1.1