diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2010-01-22 22:31:31 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2010-01-22 22:31:31 +0000 |
commit | 326fb376dda24e2c3f445863791eb6073d91d69b (patch) | |
tree | 6f272a713b5deab3bead327e258592b887f3d528 /winsup/cygwin/nlsfuncs.cc | |
parent | e894eef9f5bfa989029423afe729da554df8d17e (diff) | |
download | newlib-326fb376dda24e2c3f445863791eb6073d91d69b.zip newlib-326fb376dda24e2c3f445863791eb6073d91d69b.tar.gz newlib-326fb376dda24e2c3f445863791eb6073d91d69b.tar.bz2 |
* Makefile.in (DLL_OFILES): Add nlsfunc.o and strfmon.o.
* autoload.cc (LocaleNameToLCID): Define.
* cygwin.din (strfmon): Export.
* nlsfuncs.cc: New file. Define a lot of internal functions called
from setlocale.
(wcscoll): Implement locale-aware here, using CompareStringW function.
(strcoll): Ditto.
(wcsxfrm): Implement locale-aware here, usingLCMapStringW function.
(strxfrm): Ditto.
(__set_charset_from_locale): Replace __set_charset_from_codepage.
Return Linux-compatible charset.
* strfuncs.cc (__set_charset_from_codepage): Remove.
* wchar.h (__set_charset_from_codepage): Drop definition.
* wincap.h (wincaps::has_localenames): New element.
* wincap.cc: Implement above element throughout.
* libc/strfmon.c: New file.
* libc/strptime.cc: Remove locale constant strings in favor of
access to locale-specifc data.
(strptime): Point _CurrentTimeLocale to locale-specific data.
Throughout use correct locale-specific format fields for all
locale-specific formats.
* include/monetary.h: New file.
* include/cygwin/version.h (CYGWIN_VERSION_API_MINOR): Bump.
Diffstat (limited to 'winsup/cygwin/nlsfuncs.cc')
-rw-r--r-- | winsup/cygwin/nlsfuncs.cc | 764 |
1 files changed, 764 insertions, 0 deletions
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc new file mode 100644 index 0000000..073aa13 --- /dev/null +++ b/winsup/cygwin/nlsfuncs.cc @@ -0,0 +1,764 @@ +/* nlsfuncs.cc: NLS helper functions + + Copyright 2010 Red Hat, Inc. + +This file is part of Cygwin. + +This software is a copyrighted work licensed under the terms of the +Cygwin license. Please consult the file "CYGWIN_LICENSE" for +details. */ + +#include "winsup.h" +#include <stdlib.h> +#include <winnls.h> +#include <wchar.h> +#include "path.h" +#include "fhandler.h" +#include "dtable.h" +#include "cygheap.h" +#include "tls_pbuf.h" +/* Internal headers from newlib */ +#include "../locale/timelocal.h" +#include "../locale/lnumeric.h" +#include "../locale/lmonetary.h" + +static char *lc_time_buf; +static char *lc_numeric_buf; +static char *lc_monetary_buf; + +#define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr + +#define getlocaleinfo(category,type) \ + __getlocaleinfo(lcid,(type),_LC(category),f_wctomb,charset) +#define eval_datetimefmt(type,force) \ + __eval_datetimefmt(lcid,(type),(force),&lc_time_ptr,\ + lc_time_end-lc_time_ptr,f_wctomb, charset) + +/* Vista and later. Not defined in w32api yet. */ +extern "C" { +WINBASEAPI LCID WINAPI LocaleNameToLCID (LPCWSTR, DWORD); +}; + +static char last_locale[ENCODING_LEN + 1]; +static LCID last_lcid; + +/* Fetch LCID from POSIX locale specifier. + Return values: + + -1: Invalid locale + 0: C or POSIX + >0: LCID +*/ +static LCID +__get_lcid_from_locale (const char *name) +{ + char locale[ENCODING_LEN + 1]; + char *c; + LCID lcid; + + if (!strcmp (name, last_locale)) + { + debug_printf ("LCID=0x%04x", last_lcid); + return last_lcid; + } + stpcpy (last_locale, name); + stpcpy (locale, name); + /* Drop charset and modifier */ + c = strchr (locale, '.'); + if (!c) + c = strchr (locale, '@'); + if (c) + *c = '\0'; + /* "POSIX" already converted to "C" in loadlocale. */ + if (!strcmp (locale, "C")) + return 0; + /* Convert to form understood by LocaleNameToLCID */ + c = strchr (locale, '_'); + if (c) + *c = '-'; + if (wincap.has_localenames ()) + { + wchar_t wlocale[ENCODING_LEN + 1]; + mbstowcs (wlocale, locale, ENCODING_LEN + 1); + lcid = LocaleNameToLCID (wlocale, 0); + last_lcid = lcid ?: (LCID) -1; + debug_printf ("LCID=0x%04x", last_lcid); + return last_lcid; + } + /* Pre-Vista we have to loop through the LCID values and see if they + match language and TERRITORY. */ + if (c) + *c++ = '\0'; + /* locale now points to the language, c points to the TERRITORY */ + const char *language = locale; + const char *territory = c; + LCID lang, sublang; + char iso[10]; + + /* In theory the lang part takes 10 bits (0x3ff), but up to Windows 2003 R2 + the highest lang value is 0x81. */ + for (lang = 1; lang <= 0x81; ++lang) + if (GetLocaleInfo (lang, LOCALE_SISO639LANGNAME, iso, 10) + && !strcmp (language, iso)) + break; + if (lang > 0x81) + lcid = 0; + else if (!territory) + lcid = lang; + else + { + /* In theory the sublang part takes 7 bits (0x3f), but up to + Windows 2003 R2 the highest sublang value is 0x14. */ + for (sublang = 1; sublang <= 0x14; ++sublang) + { + lcid = (sublang << 10) | lang; + if (GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso, 10) + && !strcmp (territory, iso)) + break; + } + if (sublang > 0x14) + lcid = 0; + } + last_lcid = lcid ?: (LCID) -1; + debug_printf ("LCID=0x%04x", last_lcid); + return last_lcid; +} + +/* Never returns -1, *iff* s is not NULL. Just skips invalid chars + instead. s==NULL returns -1 since it's used to recognize invalid + strings in the used charset. */ +static size_t +lc_wcstombs (wctomb_p f_wctomb, const char *charset, + char *s, const wchar_t *pwcs, size_t n) +{ + char *ptr = s; + size_t max = n; + char buf[8]; + size_t i, bytes, num_to_copy; + mbstate_t state; + + memset (&state, 0, sizeof state); + if (s == NULL) + { + size_t num_bytes = 0; + while (*pwcs != 0) + { + bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state); + if (bytes == (size_t) -1) + return (size_t) -1; + num_bytes += bytes; + } + return num_bytes; + } + while (n > 0) + { + bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state); + if (bytes == (size_t) -1) + { + memset (&state, 0, sizeof state); + ++pwcs; + continue; + } + num_to_copy = (n > bytes ? bytes : n); + for (i = 0; i < num_to_copy; ++i) + *ptr++ = buf[i]; + + if (*pwcs == 0x00) + return ptr - s - (n >= bytes); + ++pwcs; + n -= num_to_copy; + } + return max; +} + +/* Never returns -1. Invalid sequences are translated to replacement + wide-chars. */ +static size_t +lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset, + wchar_t *pwcs, const char *s, size_t n) +{ + size_t ret = 0; + char *t = (char *) s; + size_t bytes; + mbstate_t state; + + memset (&state, 0, sizeof state); + if (!pwcs) + n = 1; + while (n > 0) + { + bytes = f_mbtowc (_REENT, pwcs, t, MB_CUR_MAX, charset, &state); + if (bytes == (size_t) -1) + { + state.__count = 0; + bytes = 1; + if (pwcs) + *pwcs = L' '; + } + else if (bytes == 0) + break; + t += bytes; + ++ret; + if (pwcs) + { + ++pwcs; + --n; + } + } + return ret; +} + +static char * +__getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size, + wctomb_p f_wctomb, const char *charset) +{ + wchar_t wbuf[80]; + size_t num; + char *ret; + + GetLocaleInfoW (lcid, type, wbuf, 80); + num = lc_wcstombs (f_wctomb, charset, ret = *ptr, wbuf, size); + *ptr += num + 1; + return ret; +} + +static UINT +getlocaleint (LCID lcid, LCTYPE type) +{ + UINT val; + return GetLocaleInfoW (lcid, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val, + sizeof val) ? val : 0; +} + +static char * +__eval_datetimefmt (LCID lcid, LCTYPE type, int force, char **ptr, + size_t size, wctomb_p f_wctomb, const char *charset) +{ + wchar_t buf[80]; + wchar_t fc; + size_t num; + mbstate_t mb; + size_t idx; + const char *day_str = "edaA"; + const char *mon_str = "mmbB"; + const char *year_str = "yyyY"; + const char *hour12_str = "lI"; + const char *hour24_str = "kH"; + const char *t_str; + char *ret = *ptr; + char *p = *ptr; + + GetLocaleInfoW (lcid, type, buf, 80); + memset (&mb, 0, sizeof mb); + for (wchar_t *fmt = buf; *fmt; ++fmt) + switch (fc = *fmt) + { + case L'\'': + if (fmt[1] == L'\'') + *p++ = '\''; + else + while (fmt[1] && *++fmt != L'\'') + { + num = f_wctomb (_REENT, p, *fmt, charset, &mb); + if (num == (size_t) -1) + memset (&mb, 0, sizeof mb); + else + p += num; + } + break; + case L'd': + case L'M': + case L'y': + t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str); + if (fc == L'y') + force = 0; + for (idx = 1; fmt[1] == fc; ++idx, ++fmt); + if (--idx > 3) + idx = 3; + if (force && idx == 3) + idx = 2; + *p++ = '%'; + *p++ = t_str[idx]; + break; + case L'g': + break; + case L'h': + case L'H': + t_str = (fc == L'h' || force ? hour12_str : hour24_str); + idx = 0; + if (fmt[1] == fc) + { + ++fmt; + idx = 1; + } + *p++ = '%'; + *p++ = t_str[idx]; + break; + case L'm': + case L's': + case L't': + if (fmt[1] == fc) + ++fmt; + *p++ = '%'; + *p++ = (fc == L'm' ? 'M' : fc == L's' ? 'S' : 'p'); + break; + case L'\t': + case L'\n': + case L'%': + *p++ = '%'; + *p++ = (char) fc; + break; + default: + num = f_wctomb (_REENT, p, *fmt, charset, &mb); + if (num == (size_t) -1) + memset (&mb, 0, sizeof mb); + else + p += num; + break; + } + *p++ = '\0'; + *ptr = p; + return ret; +} + +/* Convert Windows grouping format into POSIX grouping format. */ +static char * +conv_grouping (LCID lcid, LCTYPE type, char **lc_ptr) +{ + char buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */ + bool repeat = false; + char *ptr = *lc_ptr; + char *ret = ptr; + + GetLocaleInfoA (lcid, type, buf, 10); + /* Convert Windows grouping format into POSIX grouping format. */ + for (char *c = buf; *c; ++c) + { + if (*c < '0' || *c > '9') + continue; + char val = *c - '0'; + if (!val) + { + repeat = true; + break; + } + *ptr++ = val; + } + if (!repeat) + *ptr++ = CHAR_MAX; + *ptr++ = '\0'; + *lc_ptr = ptr; + return ret; +} + +/* Called from newlib's setlocale() via __time_load_locale() if category + is LC_TIME. Returns LC_TIME values fetched from Windows locale data + in the structure pointed to by _time_locale. This is subsequently + accessed by functions like nl_langinfo, strftime, strptime. */ +extern "C" int +__set_lc_time_from_win (const char *name, struct lc_time_T *_time_locale, + wctomb_p f_wctomb, const char *charset) +{ + LCID lcid = __get_lcid_from_locale (name); + if (!lcid || lcid == (LCID) -1) + return lcid; + + char *new_lc_time_buf = (char *) malloc (4096); + const char *lc_time_end = new_lc_time_buf + 4096; + + if (!new_lc_time_buf) + return -1; + char *lc_time_ptr = new_lc_time_buf; + /* mon */ + for (int i = 0; i < 12; ++i) + _time_locale->mon[i] = getlocaleinfo (time, LOCALE_SABBREVMONTHNAME1 + i); + /* month and alt_month */ + for (int i = 0; i < 12; ++i) + _time_locale->month[i] = _time_locale->alt_month[i] + = getlocaleinfo (time, LOCALE_SMONTHNAME1 + i); + /* wday */ + _time_locale->wday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7); + for (int i = 0; i < 6; ++i) + _time_locale->wday[i + 1] = getlocaleinfo (time, + LOCALE_SABBREVDAYNAME1 + i); + /* weekday */ + _time_locale->weekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7); + for (int i = 0; i < 6; ++i) + _time_locale->weekday[i + 1] = getlocaleinfo (time, LOCALE_SDAYNAME1 + i); + /* X_fmt */ + _time_locale->X_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, 0); + /* x_fmt */ + _time_locale->x_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, 0); + /* c_fmt */ + _time_locale->c_fmt = eval_datetimefmt (LOCALE_SLONGDATE, 1); + --lc_time_ptr; + *lc_time_ptr++ = ' '; + eval_datetimefmt (LOCALE_STIMEFORMAT, 0); + /* AM/PM */ + _time_locale->am_pm[0] = getlocaleinfo (time, LOCALE_S1159); + _time_locale->am_pm[1] = getlocaleinfo (time, LOCALE_S2359); + /* date_fmt */ + _time_locale->date_fmt = eval_datetimefmt (LOCALE_SLONGDATE, 1); + --lc_time_ptr; + *lc_time_ptr++ = ' '; + eval_datetimefmt (LOCALE_STIMEFORMAT, 0); + --lc_time_ptr; + lc_time_ptr = stpcpy (lc_time_ptr, " %Z") + 1; + /* md */ + { + wchar_t buf[80]; + GetLocaleInfoW (lcid, LOCALE_IDATE, buf, 80); + lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1; + } + /* ampm_fmt */ + _time_locale->ampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, 1); + + char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf); + if (!tmp) + { + free (new_lc_time_buf); + return -1; + } + if (lc_time_buf) + free (lc_time_buf); + lc_time_buf = tmp; + return 1; +} + +/* Called from newlib's setlocale() via __numeric_load_locale() if category + is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data + in the structure pointed to by _numeric_locale. This is subsequently + accessed by functions like nl_langinfo, localeconv, printf, etc. */ +extern "C" int +__set_lc_numeric_from_win (const char *name, + struct lc_numeric_T *_numeric_locale, + wctomb_p f_wctomb, const char *charset) +{ + LCID lcid = __get_lcid_from_locale (name); + if (!lcid || lcid == (LCID) -1) + return lcid; + + char *new_lc_numeric_buf = (char *) malloc (48); + const char *lc_numeric_end = new_lc_numeric_buf + 48; + + if (!new_lc_numeric_buf) + return -1; + char *lc_numeric_ptr = new_lc_numeric_buf; + /* decimal_point */ + _numeric_locale->decimal_point = getlocaleinfo (numeric, + LOCALE_SDECIMAL); + /* thousands_sep */ + _numeric_locale->thousands_sep = getlocaleinfo (numeric, + LOCALE_STHOUSAND); + /* grouping */ + _numeric_locale->grouping = conv_grouping (lcid, LOCALE_SGROUPING, + &lc_numeric_ptr); + + char *tmp = (char *) realloc (new_lc_numeric_buf, + lc_numeric_ptr - new_lc_numeric_buf); + if (!tmp) + { + free (new_lc_numeric_buf); + return -1; + } + if (lc_numeric_buf) + free (lc_numeric_buf); + lc_numeric_buf = tmp; + return 1; +} + +/* Called from newlib's setlocale() via __monetary_load_locale() if category + is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data + in the structure pointed to by _monetary_locale. This is subsequently + accessed by functions like nl_langinfo, localeconv, printf, etc. */ +extern "C" int +__set_lc_monetary_from_win (const char *name, + struct lc_monetary_T *_monetary_locale, + wctomb_p f_wctomb, const char *charset) +{ + LCID lcid = __get_lcid_from_locale (name); + if (!lcid || lcid == (LCID) -1) + return lcid; + + char *new_lc_monetary_buf = (char *) malloc (256); + const char *lc_monetary_end = new_lc_monetary_buf + 256; + + if (!new_lc_monetary_buf) + return -1; + char *lc_monetary_ptr = new_lc_monetary_buf; + /* int_curr_symbol */ + _monetary_locale->int_curr_symbol = getlocaleinfo (monetary, + LOCALE_SINTLSYMBOL); + /* No spacing char means space. */ + if (!_monetary_locale->int_curr_symbol[3]) + { + lc_monetary_ptr[-1] = ' '; + *lc_monetary_ptr++ = '\0'; + } + /* currency_symbol */ + { + /* As on Linux: If the currency_symbol can't be represented in the + given charset, use int_curr_symbol. */ + wchar_t wbuf[14]; + GetLocaleInfoW (lcid, LOCALE_SCURRENCY, wbuf, 14); + if (lc_wcstombs (f_wctomb, charset, NULL, wbuf, 0) == (size_t) -1) + { + _monetary_locale->currency_symbol = lc_monetary_ptr; + lc_monetary_ptr = stpncpy (lc_monetary_ptr, + _monetary_locale->int_curr_symbol, 3); + *lc_monetary_ptr++ = '\0'; + } + else + _monetary_locale->currency_symbol = getlocaleinfo (monetary, + LOCALE_SCURRENCY); + } + /* mon_decimal_point */ + _monetary_locale->mon_decimal_point = getlocaleinfo (monetary, + LOCALE_SMONDECIMALSEP); + /* mon_thousands_sep */ + _monetary_locale->mon_thousands_sep = getlocaleinfo (monetary, + LOCALE_SMONTHOUSANDSEP); + /* mon_grouping */ + _monetary_locale->mon_grouping = conv_grouping (lcid, LOCALE_SMONGROUPING, + &lc_monetary_ptr); + /* positive_sign */ + _monetary_locale->positive_sign = getlocaleinfo (monetary, + LOCALE_SPOSITIVESIGN); + /* negative_sign */ + _monetary_locale->negative_sign = getlocaleinfo (monetary, + LOCALE_SNEGATIVESIGN); + /* int_frac_digits */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IINTLCURRDIGITS); + _monetary_locale->int_frac_digits = lc_monetary_ptr++; + /* frac_digits */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_ICURRDIGITS); + _monetary_locale->frac_digits = lc_monetary_ptr++; + /* p_cs_precedes */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSYMPRECEDES); + _monetary_locale->p_cs_precedes = lc_monetary_ptr++; + /* p_sep_by_space */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSEPBYSPACE); + _monetary_locale->p_sep_by_space = lc_monetary_ptr++; + /* n_cs_precedes */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSYMPRECEDES); + _monetary_locale->n_cs_precedes = lc_monetary_ptr++; + /* n_sep_by_space */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSEPBYSPACE); + _monetary_locale->n_sep_by_space = lc_monetary_ptr++; + /* p_sign_posn */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSIGNPOSN); + _monetary_locale->p_sign_posn = lc_monetary_ptr++; + /* p_sign_posn */ + *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSIGNPOSN); + _monetary_locale->n_sign_posn = lc_monetary_ptr++; + + char *tmp = (char *) realloc (new_lc_monetary_buf, + lc_monetary_ptr - new_lc_monetary_buf); + if (!tmp) + { + free (new_lc_monetary_buf); + return -1; + } + if (lc_monetary_buf) + free (lc_monetary_buf); + lc_monetary_buf = tmp; + return 1; +} + +static LCID collate_lcid = 0; +static mbtowc_p collate_mbtowc = __ascii_mbtowc; +static char collate_charset[ENCODING_LEN + 1] = "ASCII"; + +/* Called from newlib's setlocale() if category is LC_COLLATE. Stores + LC_COLLATE locale information. This is subsequently accessed by the + below functions strcoll, strxfrm, wcscoll, wcsxfrm. */ +extern "C" int +__collate_load_locale (const char *name, mbtowc_p f_mbtowc, const char *charset) +{ + LCID lcid = __get_lcid_from_locale (name); + if (lcid == (LCID) -1) + return -1; + collate_lcid = lcid; + collate_mbtowc = f_mbtowc; + stpcpy (collate_charset, charset); + return 0; +} + +/* We use the Windows functions for locale-specific string comparison and + transformation. The advantage is that we don't need any files with + collation information. */ +extern "C" int +wcscoll (const wchar_t *ws1, const wchar_t *ws2) +{ + int ret; + + if (!collate_lcid) + return wcscmp (ws1, ws2); + ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1); + if (!ret) + set_errno (EINVAL); + return ret - CSTR_EQUAL; +} + +extern "C" int +strcoll (const char *s1, const char *s2) +{ + size_t n1, n2; + wchar_t *ws1, *ws2; + tmp_pathbuf tp; + int ret; + + if (!collate_lcid) + return strcmp (s1, s2); + /* The ANSI version of CompareString uses the default charset of the lcid, + so we must use the Unicode version. */ + n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1; + ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t)) + : tp.w_get ()); + lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1); + n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1; + ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) + : tp.w_get ()); + lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2); + ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1); + if (n1 > NT_MAX_PATH) + free (ws1); + if (n2 > NT_MAX_PATH) + free (ws2); + if (!ret) + set_errno (EINVAL); + return ret - CSTR_EQUAL; +} + +extern "C" size_t +wcsxfrm (wchar_t *ws1, const wchar_t *ws2, size_t wsn) +{ + size_t ret; + + if (!collate_lcid) + return wcslcpy (ws1, ws2, wsn); + ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, + ws2, -1, ws1, wsn * sizeof (wchar_t)); + /* LCMapStringW returns byte count including the terminating NUL character, + wcsxfrm is supposed to return length in wchar_t excluding the NUL. + Since the array is only single byte NUL-terminated we must make sure + the result is wchar_t-NUL terminated. */ + if (ret) + { + ret = (ret + 1) / sizeof (wchar_t); + if (ret >= wsn) + return wsn; + ws1[ret] = L'\0'; + return ret; + } + if (GetLastError () != ERROR_INSUFFICIENT_BUFFER) + set_errno (EINVAL); + return wsn; +} + +extern "C" size_t +strxfrm (char *s1, const char *s2, size_t sn) +{ + size_t ret; + size_t n2; + wchar_t *ws2; + tmp_pathbuf tp; + + if (!collate_lcid) + return strlcpy (s1, s2, sn); + /* The ANSI version of LCMapString uses the default charset of the lcid, + so we must use the Unicode version. */ + n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1; + ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) + : tp.w_get ()); + lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2); + /* The sort key is a NUL-terminated byte string. */ + ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn); + if (n2 > NT_MAX_PATH) + free (ws2); + if (ret == 0) + { + if (GetLastError () != ERROR_INSUFFICIENT_BUFFER) + set_errno (EINVAL); + return sn; + } + /* LCMapStringW returns byte count including the terminating NUL character. + strxfrm is supposed to return length excluding the NUL. */ + return ret - 1; +} + +/* Fetch default ANSI codepage from locale info and generate a setlocale + compatible character set code. Called from newlib's setlocale(), if the + charset isn't given explicitely in the POSIX compatible locale specifier. + The function also returns a pointer to the corresponding _mbtowc_r function + which is used subsequently. */ +extern "C" void +__set_charset_from_locale (const char *locale, char *charset) +{ + UINT cp; + LCID lcid = __get_lcid_from_locale (locale); + + /* "C" locale, or invalid locale? */ + if (lcid == 0 || lcid == (LCID) -1) + { + __small_sprintf (charset, "ASCII"); + return; + } + if (!GetLocaleInfoW (lcid, + LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, + (PWCHAR) &cp, sizeof cp)) + cp = 0; + /* codepage to de-facto standard charset transition. */ + switch (cp) + { + case 874: + __small_sprintf (charset, "CP%u", cp); + break; + case 932: + strcpy (charset, "EUCJP"); + break; + case 936: + strcpy (charset, "GBK"); + break; + case 949: + strcpy (charset, "EUCKR"); + break; + case 950: + strcpy (charset, "BIG5"); + break; + case 1250: + strcpy (charset, "ISO-8859-2"); + break; + case 1251: + strcpy (charset, "ISO-8859-5"); + break; + case 1252: + strcpy (charset, "ISO-8859-1"); + break; + case 1253: + strcpy (charset, "ISO-8859-7"); + break; + case 1254: + strcpy (charset, "ISO-8859-9"); + break; + case 1255: + strcpy (charset, "ISO-8859-8"); + break; + case 1256: + strcpy (charset, "ISO-8859-6"); + break; + case 1257: + strcpy (charset, "ISO-8859-13"); + break; + case 1258: + default: + strcpy (charset, "UTF-8"); + break; + } + if (cp >= 1250 && cp <= 1257) + { + char *c = strchr (locale, '@'); + if (c && !strcmp (c + 1, "euro")) + strcpy (charset, "ISO-8859-15"); + } +} |