aboutsummaryrefslogtreecommitdiff
path: root/winsup/cygwin/nlsfuncs.cc
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2010-01-22 22:31:31 +0000
committerCorinna Vinschen <corinna@vinschen.de>2010-01-22 22:31:31 +0000
commit326fb376dda24e2c3f445863791eb6073d91d69b (patch)
tree6f272a713b5deab3bead327e258592b887f3d528 /winsup/cygwin/nlsfuncs.cc
parente894eef9f5bfa989029423afe729da554df8d17e (diff)
downloadnewlib-326fb376dda24e2c3f445863791eb6073d91d69b.zip
newlib-326fb376dda24e2c3f445863791eb6073d91d69b.tar.gz
newlib-326fb376dda24e2c3f445863791eb6073d91d69b.tar.bz2
* Makefile.in (DLL_OFILES): Add nlsfunc.o and strfmon.o.
* autoload.cc (LocaleNameToLCID): Define. * cygwin.din (strfmon): Export. * nlsfuncs.cc: New file. Define a lot of internal functions called from setlocale. (wcscoll): Implement locale-aware here, using CompareStringW function. (strcoll): Ditto. (wcsxfrm): Implement locale-aware here, usingLCMapStringW function. (strxfrm): Ditto. (__set_charset_from_locale): Replace __set_charset_from_codepage. Return Linux-compatible charset. * strfuncs.cc (__set_charset_from_codepage): Remove. * wchar.h (__set_charset_from_codepage): Drop definition. * wincap.h (wincaps::has_localenames): New element. * wincap.cc: Implement above element throughout. * libc/strfmon.c: New file. * libc/strptime.cc: Remove locale constant strings in favor of access to locale-specifc data. (strptime): Point _CurrentTimeLocale to locale-specific data. Throughout use correct locale-specific format fields for all locale-specific formats. * include/monetary.h: New file. * include/cygwin/version.h (CYGWIN_VERSION_API_MINOR): Bump.
Diffstat (limited to 'winsup/cygwin/nlsfuncs.cc')
-rw-r--r--winsup/cygwin/nlsfuncs.cc764
1 files changed, 764 insertions, 0 deletions
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
new file mode 100644
index 0000000..073aa13
--- /dev/null
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -0,0 +1,764 @@
+/* nlsfuncs.cc: NLS helper functions
+
+ Copyright 2010 Red Hat, Inc.
+
+This file is part of Cygwin.
+
+This software is a copyrighted work licensed under the terms of the
+Cygwin license. Please consult the file "CYGWIN_LICENSE" for
+details. */
+
+#include "winsup.h"
+#include <stdlib.h>
+#include <winnls.h>
+#include <wchar.h>
+#include "path.h"
+#include "fhandler.h"
+#include "dtable.h"
+#include "cygheap.h"
+#include "tls_pbuf.h"
+/* Internal headers from newlib */
+#include "../locale/timelocal.h"
+#include "../locale/lnumeric.h"
+#include "../locale/lmonetary.h"
+
+static char *lc_time_buf;
+static char *lc_numeric_buf;
+static char *lc_monetary_buf;
+
+#define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
+
+#define getlocaleinfo(category,type) \
+ __getlocaleinfo(lcid,(type),_LC(category),f_wctomb,charset)
+#define eval_datetimefmt(type,force) \
+ __eval_datetimefmt(lcid,(type),(force),&lc_time_ptr,\
+ lc_time_end-lc_time_ptr,f_wctomb, charset)
+
+/* Vista and later. Not defined in w32api yet. */
+extern "C" {
+WINBASEAPI LCID WINAPI LocaleNameToLCID (LPCWSTR, DWORD);
+};
+
+static char last_locale[ENCODING_LEN + 1];
+static LCID last_lcid;
+
+/* Fetch LCID from POSIX locale specifier.
+ Return values:
+
+ -1: Invalid locale
+ 0: C or POSIX
+ >0: LCID
+*/
+static LCID
+__get_lcid_from_locale (const char *name)
+{
+ char locale[ENCODING_LEN + 1];
+ char *c;
+ LCID lcid;
+
+ if (!strcmp (name, last_locale))
+ {
+ debug_printf ("LCID=0x%04x", last_lcid);
+ return last_lcid;
+ }
+ stpcpy (last_locale, name);
+ stpcpy (locale, name);
+ /* Drop charset and modifier */
+ c = strchr (locale, '.');
+ if (!c)
+ c = strchr (locale, '@');
+ if (c)
+ *c = '\0';
+ /* "POSIX" already converted to "C" in loadlocale. */
+ if (!strcmp (locale, "C"))
+ return 0;
+ /* Convert to form understood by LocaleNameToLCID */
+ c = strchr (locale, '_');
+ if (c)
+ *c = '-';
+ if (wincap.has_localenames ())
+ {
+ wchar_t wlocale[ENCODING_LEN + 1];
+ mbstowcs (wlocale, locale, ENCODING_LEN + 1);
+ lcid = LocaleNameToLCID (wlocale, 0);
+ last_lcid = lcid ?: (LCID) -1;
+ debug_printf ("LCID=0x%04x", last_lcid);
+ return last_lcid;
+ }
+ /* Pre-Vista we have to loop through the LCID values and see if they
+ match language and TERRITORY. */
+ if (c)
+ *c++ = '\0';
+ /* locale now points to the language, c points to the TERRITORY */
+ const char *language = locale;
+ const char *territory = c;
+ LCID lang, sublang;
+ char iso[10];
+
+ /* In theory the lang part takes 10 bits (0x3ff), but up to Windows 2003 R2
+ the highest lang value is 0x81. */
+ for (lang = 1; lang <= 0x81; ++lang)
+ if (GetLocaleInfo (lang, LOCALE_SISO639LANGNAME, iso, 10)
+ && !strcmp (language, iso))
+ break;
+ if (lang > 0x81)
+ lcid = 0;
+ else if (!territory)
+ lcid = lang;
+ else
+ {
+ /* In theory the sublang part takes 7 bits (0x3f), but up to
+ Windows 2003 R2 the highest sublang value is 0x14. */
+ for (sublang = 1; sublang <= 0x14; ++sublang)
+ {
+ lcid = (sublang << 10) | lang;
+ if (GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso, 10)
+ && !strcmp (territory, iso))
+ break;
+ }
+ if (sublang > 0x14)
+ lcid = 0;
+ }
+ last_lcid = lcid ?: (LCID) -1;
+ debug_printf ("LCID=0x%04x", last_lcid);
+ return last_lcid;
+}
+
+/* Never returns -1, *iff* s is not NULL. Just skips invalid chars
+ instead. s==NULL returns -1 since it's used to recognize invalid
+ strings in the used charset. */
+static size_t
+lc_wcstombs (wctomb_p f_wctomb, const char *charset,
+ char *s, const wchar_t *pwcs, size_t n)
+{
+ char *ptr = s;
+ size_t max = n;
+ char buf[8];
+ size_t i, bytes, num_to_copy;
+ mbstate_t state;
+
+ memset (&state, 0, sizeof state);
+ if (s == NULL)
+ {
+ size_t num_bytes = 0;
+ while (*pwcs != 0)
+ {
+ bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state);
+ if (bytes == (size_t) -1)
+ return (size_t) -1;
+ num_bytes += bytes;
+ }
+ return num_bytes;
+ }
+ while (n > 0)
+ {
+ bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state);
+ if (bytes == (size_t) -1)
+ {
+ memset (&state, 0, sizeof state);
+ ++pwcs;
+ continue;
+ }
+ num_to_copy = (n > bytes ? bytes : n);
+ for (i = 0; i < num_to_copy; ++i)
+ *ptr++ = buf[i];
+
+ if (*pwcs == 0x00)
+ return ptr - s - (n >= bytes);
+ ++pwcs;
+ n -= num_to_copy;
+ }
+ return max;
+}
+
+/* Never returns -1. Invalid sequences are translated to replacement
+ wide-chars. */
+static size_t
+lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset,
+ wchar_t *pwcs, const char *s, size_t n)
+{
+ size_t ret = 0;
+ char *t = (char *) s;
+ size_t bytes;
+ mbstate_t state;
+
+ memset (&state, 0, sizeof state);
+ if (!pwcs)
+ n = 1;
+ while (n > 0)
+ {
+ bytes = f_mbtowc (_REENT, pwcs, t, MB_CUR_MAX, charset, &state);
+ if (bytes == (size_t) -1)
+ {
+ state.__count = 0;
+ bytes = 1;
+ if (pwcs)
+ *pwcs = L' ';
+ }
+ else if (bytes == 0)
+ break;
+ t += bytes;
+ ++ret;
+ if (pwcs)
+ {
+ ++pwcs;
+ --n;
+ }
+ }
+ return ret;
+}
+
+static char *
+__getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size,
+ wctomb_p f_wctomb, const char *charset)
+{
+ wchar_t wbuf[80];
+ size_t num;
+ char *ret;
+
+ GetLocaleInfoW (lcid, type, wbuf, 80);
+ num = lc_wcstombs (f_wctomb, charset, ret = *ptr, wbuf, size);
+ *ptr += num + 1;
+ return ret;
+}
+
+static UINT
+getlocaleint (LCID lcid, LCTYPE type)
+{
+ UINT val;
+ return GetLocaleInfoW (lcid, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
+ sizeof val) ? val : 0;
+}
+
+static char *
+__eval_datetimefmt (LCID lcid, LCTYPE type, int force, char **ptr,
+ size_t size, wctomb_p f_wctomb, const char *charset)
+{
+ wchar_t buf[80];
+ wchar_t fc;
+ size_t num;
+ mbstate_t mb;
+ size_t idx;
+ const char *day_str = "edaA";
+ const char *mon_str = "mmbB";
+ const char *year_str = "yyyY";
+ const char *hour12_str = "lI";
+ const char *hour24_str = "kH";
+ const char *t_str;
+ char *ret = *ptr;
+ char *p = *ptr;
+
+ GetLocaleInfoW (lcid, type, buf, 80);
+ memset (&mb, 0, sizeof mb);
+ for (wchar_t *fmt = buf; *fmt; ++fmt)
+ switch (fc = *fmt)
+ {
+ case L'\'':
+ if (fmt[1] == L'\'')
+ *p++ = '\'';
+ else
+ while (fmt[1] && *++fmt != L'\'')
+ {
+ num = f_wctomb (_REENT, p, *fmt, charset, &mb);
+ if (num == (size_t) -1)
+ memset (&mb, 0, sizeof mb);
+ else
+ p += num;
+ }
+ break;
+ case L'd':
+ case L'M':
+ case L'y':
+ t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
+ if (fc == L'y')
+ force = 0;
+ for (idx = 1; fmt[1] == fc; ++idx, ++fmt);
+ if (--idx > 3)
+ idx = 3;
+ if (force && idx == 3)
+ idx = 2;
+ *p++ = '%';
+ *p++ = t_str[idx];
+ break;
+ case L'g':
+ break;
+ case L'h':
+ case L'H':
+ t_str = (fc == L'h' || force ? hour12_str : hour24_str);
+ idx = 0;
+ if (fmt[1] == fc)
+ {
+ ++fmt;
+ idx = 1;
+ }
+ *p++ = '%';
+ *p++ = t_str[idx];
+ break;
+ case L'm':
+ case L's':
+ case L't':
+ if (fmt[1] == fc)
+ ++fmt;
+ *p++ = '%';
+ *p++ = (fc == L'm' ? 'M' : fc == L's' ? 'S' : 'p');
+ break;
+ case L'\t':
+ case L'\n':
+ case L'%':
+ *p++ = '%';
+ *p++ = (char) fc;
+ break;
+ default:
+ num = f_wctomb (_REENT, p, *fmt, charset, &mb);
+ if (num == (size_t) -1)
+ memset (&mb, 0, sizeof mb);
+ else
+ p += num;
+ break;
+ }
+ *p++ = '\0';
+ *ptr = p;
+ return ret;
+}
+
+/* Convert Windows grouping format into POSIX grouping format. */
+static char *
+conv_grouping (LCID lcid, LCTYPE type, char **lc_ptr)
+{
+ char buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
+ bool repeat = false;
+ char *ptr = *lc_ptr;
+ char *ret = ptr;
+
+ GetLocaleInfoA (lcid, type, buf, 10);
+ /* Convert Windows grouping format into POSIX grouping format. */
+ for (char *c = buf; *c; ++c)
+ {
+ if (*c < '0' || *c > '9')
+ continue;
+ char val = *c - '0';
+ if (!val)
+ {
+ repeat = true;
+ break;
+ }
+ *ptr++ = val;
+ }
+ if (!repeat)
+ *ptr++ = CHAR_MAX;
+ *ptr++ = '\0';
+ *lc_ptr = ptr;
+ return ret;
+}
+
+/* Called from newlib's setlocale() via __time_load_locale() if category
+ is LC_TIME. Returns LC_TIME values fetched from Windows locale data
+ in the structure pointed to by _time_locale. This is subsequently
+ accessed by functions like nl_langinfo, strftime, strptime. */
+extern "C" int
+__set_lc_time_from_win (const char *name, struct lc_time_T *_time_locale,
+ wctomb_p f_wctomb, const char *charset)
+{
+ LCID lcid = __get_lcid_from_locale (name);
+ if (!lcid || lcid == (LCID) -1)
+ return lcid;
+
+ char *new_lc_time_buf = (char *) malloc (4096);
+ const char *lc_time_end = new_lc_time_buf + 4096;
+
+ if (!new_lc_time_buf)
+ return -1;
+ char *lc_time_ptr = new_lc_time_buf;
+ /* mon */
+ for (int i = 0; i < 12; ++i)
+ _time_locale->mon[i] = getlocaleinfo (time, LOCALE_SABBREVMONTHNAME1 + i);
+ /* month and alt_month */
+ for (int i = 0; i < 12; ++i)
+ _time_locale->month[i] = _time_locale->alt_month[i]
+ = getlocaleinfo (time, LOCALE_SMONTHNAME1 + i);
+ /* wday */
+ _time_locale->wday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
+ for (int i = 0; i < 6; ++i)
+ _time_locale->wday[i + 1] = getlocaleinfo (time,
+ LOCALE_SABBREVDAYNAME1 + i);
+ /* weekday */
+ _time_locale->weekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
+ for (int i = 0; i < 6; ++i)
+ _time_locale->weekday[i + 1] = getlocaleinfo (time, LOCALE_SDAYNAME1 + i);
+ /* X_fmt */
+ _time_locale->X_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, 0);
+ /* x_fmt */
+ _time_locale->x_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, 0);
+ /* c_fmt */
+ _time_locale->c_fmt = eval_datetimefmt (LOCALE_SLONGDATE, 1);
+ --lc_time_ptr;
+ *lc_time_ptr++ = ' ';
+ eval_datetimefmt (LOCALE_STIMEFORMAT, 0);
+ /* AM/PM */
+ _time_locale->am_pm[0] = getlocaleinfo (time, LOCALE_S1159);
+ _time_locale->am_pm[1] = getlocaleinfo (time, LOCALE_S2359);
+ /* date_fmt */
+ _time_locale->date_fmt = eval_datetimefmt (LOCALE_SLONGDATE, 1);
+ --lc_time_ptr;
+ *lc_time_ptr++ = ' ';
+ eval_datetimefmt (LOCALE_STIMEFORMAT, 0);
+ --lc_time_ptr;
+ lc_time_ptr = stpcpy (lc_time_ptr, " %Z") + 1;
+ /* md */
+ {
+ wchar_t buf[80];
+ GetLocaleInfoW (lcid, LOCALE_IDATE, buf, 80);
+ lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
+ }
+ /* ampm_fmt */
+ _time_locale->ampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, 1);
+
+ char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
+ if (!tmp)
+ {
+ free (new_lc_time_buf);
+ return -1;
+ }
+ if (lc_time_buf)
+ free (lc_time_buf);
+ lc_time_buf = tmp;
+ return 1;
+}
+
+/* Called from newlib's setlocale() via __numeric_load_locale() if category
+ is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data
+ in the structure pointed to by _numeric_locale. This is subsequently
+ accessed by functions like nl_langinfo, localeconv, printf, etc. */
+extern "C" int
+__set_lc_numeric_from_win (const char *name,
+ struct lc_numeric_T *_numeric_locale,
+ wctomb_p f_wctomb, const char *charset)
+{
+ LCID lcid = __get_lcid_from_locale (name);
+ if (!lcid || lcid == (LCID) -1)
+ return lcid;
+
+ char *new_lc_numeric_buf = (char *) malloc (48);
+ const char *lc_numeric_end = new_lc_numeric_buf + 48;
+
+ if (!new_lc_numeric_buf)
+ return -1;
+ char *lc_numeric_ptr = new_lc_numeric_buf;
+ /* decimal_point */
+ _numeric_locale->decimal_point = getlocaleinfo (numeric,
+ LOCALE_SDECIMAL);
+ /* thousands_sep */
+ _numeric_locale->thousands_sep = getlocaleinfo (numeric,
+ LOCALE_STHOUSAND);
+ /* grouping */
+ _numeric_locale->grouping = conv_grouping (lcid, LOCALE_SGROUPING,
+ &lc_numeric_ptr);
+
+ char *tmp = (char *) realloc (new_lc_numeric_buf,
+ lc_numeric_ptr - new_lc_numeric_buf);
+ if (!tmp)
+ {
+ free (new_lc_numeric_buf);
+ return -1;
+ }
+ if (lc_numeric_buf)
+ free (lc_numeric_buf);
+ lc_numeric_buf = tmp;
+ return 1;
+}
+
+/* Called from newlib's setlocale() via __monetary_load_locale() if category
+ is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data
+ in the structure pointed to by _monetary_locale. This is subsequently
+ accessed by functions like nl_langinfo, localeconv, printf, etc. */
+extern "C" int
+__set_lc_monetary_from_win (const char *name,
+ struct lc_monetary_T *_monetary_locale,
+ wctomb_p f_wctomb, const char *charset)
+{
+ LCID lcid = __get_lcid_from_locale (name);
+ if (!lcid || lcid == (LCID) -1)
+ return lcid;
+
+ char *new_lc_monetary_buf = (char *) malloc (256);
+ const char *lc_monetary_end = new_lc_monetary_buf + 256;
+
+ if (!new_lc_monetary_buf)
+ return -1;
+ char *lc_monetary_ptr = new_lc_monetary_buf;
+ /* int_curr_symbol */
+ _monetary_locale->int_curr_symbol = getlocaleinfo (monetary,
+ LOCALE_SINTLSYMBOL);
+ /* No spacing char means space. */
+ if (!_monetary_locale->int_curr_symbol[3])
+ {
+ lc_monetary_ptr[-1] = ' ';
+ *lc_monetary_ptr++ = '\0';
+ }
+ /* currency_symbol */
+ {
+ /* As on Linux: If the currency_symbol can't be represented in the
+ given charset, use int_curr_symbol. */
+ wchar_t wbuf[14];
+ GetLocaleInfoW (lcid, LOCALE_SCURRENCY, wbuf, 14);
+ if (lc_wcstombs (f_wctomb, charset, NULL, wbuf, 0) == (size_t) -1)
+ {
+ _monetary_locale->currency_symbol = lc_monetary_ptr;
+ lc_monetary_ptr = stpncpy (lc_monetary_ptr,
+ _monetary_locale->int_curr_symbol, 3);
+ *lc_monetary_ptr++ = '\0';
+ }
+ else
+ _monetary_locale->currency_symbol = getlocaleinfo (monetary,
+ LOCALE_SCURRENCY);
+ }
+ /* mon_decimal_point */
+ _monetary_locale->mon_decimal_point = getlocaleinfo (monetary,
+ LOCALE_SMONDECIMALSEP);
+ /* mon_thousands_sep */
+ _monetary_locale->mon_thousands_sep = getlocaleinfo (monetary,
+ LOCALE_SMONTHOUSANDSEP);
+ /* mon_grouping */
+ _monetary_locale->mon_grouping = conv_grouping (lcid, LOCALE_SMONGROUPING,
+ &lc_monetary_ptr);
+ /* positive_sign */
+ _monetary_locale->positive_sign = getlocaleinfo (monetary,
+ LOCALE_SPOSITIVESIGN);
+ /* negative_sign */
+ _monetary_locale->negative_sign = getlocaleinfo (monetary,
+ LOCALE_SNEGATIVESIGN);
+ /* int_frac_digits */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IINTLCURRDIGITS);
+ _monetary_locale->int_frac_digits = lc_monetary_ptr++;
+ /* frac_digits */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_ICURRDIGITS);
+ _monetary_locale->frac_digits = lc_monetary_ptr++;
+ /* p_cs_precedes */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSYMPRECEDES);
+ _monetary_locale->p_cs_precedes = lc_monetary_ptr++;
+ /* p_sep_by_space */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSEPBYSPACE);
+ _monetary_locale->p_sep_by_space = lc_monetary_ptr++;
+ /* n_cs_precedes */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSYMPRECEDES);
+ _monetary_locale->n_cs_precedes = lc_monetary_ptr++;
+ /* n_sep_by_space */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSEPBYSPACE);
+ _monetary_locale->n_sep_by_space = lc_monetary_ptr++;
+ /* p_sign_posn */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSIGNPOSN);
+ _monetary_locale->p_sign_posn = lc_monetary_ptr++;
+ /* p_sign_posn */
+ *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSIGNPOSN);
+ _monetary_locale->n_sign_posn = lc_monetary_ptr++;
+
+ char *tmp = (char *) realloc (new_lc_monetary_buf,
+ lc_monetary_ptr - new_lc_monetary_buf);
+ if (!tmp)
+ {
+ free (new_lc_monetary_buf);
+ return -1;
+ }
+ if (lc_monetary_buf)
+ free (lc_monetary_buf);
+ lc_monetary_buf = tmp;
+ return 1;
+}
+
+static LCID collate_lcid = 0;
+static mbtowc_p collate_mbtowc = __ascii_mbtowc;
+static char collate_charset[ENCODING_LEN + 1] = "ASCII";
+
+/* Called from newlib's setlocale() if category is LC_COLLATE. Stores
+ LC_COLLATE locale information. This is subsequently accessed by the
+ below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
+extern "C" int
+__collate_load_locale (const char *name, mbtowc_p f_mbtowc, const char *charset)
+{
+ LCID lcid = __get_lcid_from_locale (name);
+ if (lcid == (LCID) -1)
+ return -1;
+ collate_lcid = lcid;
+ collate_mbtowc = f_mbtowc;
+ stpcpy (collate_charset, charset);
+ return 0;
+}
+
+/* We use the Windows functions for locale-specific string comparison and
+ transformation. The advantage is that we don't need any files with
+ collation information. */
+extern "C" int
+wcscoll (const wchar_t *ws1, const wchar_t *ws2)
+{
+ int ret;
+
+ if (!collate_lcid)
+ return wcscmp (ws1, ws2);
+ ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
+ if (!ret)
+ set_errno (EINVAL);
+ return ret - CSTR_EQUAL;
+}
+
+extern "C" int
+strcoll (const char *s1, const char *s2)
+{
+ size_t n1, n2;
+ wchar_t *ws1, *ws2;
+ tmp_pathbuf tp;
+ int ret;
+
+ if (!collate_lcid)
+ return strcmp (s1, s2);
+ /* The ANSI version of CompareString uses the default charset of the lcid,
+ so we must use the Unicode version. */
+ n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1;
+ ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
+ : tp.w_get ());
+ lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1);
+ n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
+ ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
+ : tp.w_get ());
+ lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
+ ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
+ if (n1 > NT_MAX_PATH)
+ free (ws1);
+ if (n2 > NT_MAX_PATH)
+ free (ws2);
+ if (!ret)
+ set_errno (EINVAL);
+ return ret - CSTR_EQUAL;
+}
+
+extern "C" size_t
+wcsxfrm (wchar_t *ws1, const wchar_t *ws2, size_t wsn)
+{
+ size_t ret;
+
+ if (!collate_lcid)
+ return wcslcpy (ws1, ws2, wsn);
+ ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY,
+ ws2, -1, ws1, wsn * sizeof (wchar_t));
+ /* LCMapStringW returns byte count including the terminating NUL character,
+ wcsxfrm is supposed to return length in wchar_t excluding the NUL.
+ Since the array is only single byte NUL-terminated we must make sure
+ the result is wchar_t-NUL terminated. */
+ if (ret)
+ {
+ ret = (ret + 1) / sizeof (wchar_t);
+ if (ret >= wsn)
+ return wsn;
+ ws1[ret] = L'\0';
+ return ret;
+ }
+ if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
+ set_errno (EINVAL);
+ return wsn;
+}
+
+extern "C" size_t
+strxfrm (char *s1, const char *s2, size_t sn)
+{
+ size_t ret;
+ size_t n2;
+ wchar_t *ws2;
+ tmp_pathbuf tp;
+
+ if (!collate_lcid)
+ return strlcpy (s1, s2, sn);
+ /* The ANSI version of LCMapString uses the default charset of the lcid,
+ so we must use the Unicode version. */
+ n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
+ ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
+ : tp.w_get ());
+ lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
+ /* The sort key is a NUL-terminated byte string. */
+ ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn);
+ if (n2 > NT_MAX_PATH)
+ free (ws2);
+ if (ret == 0)
+ {
+ if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
+ set_errno (EINVAL);
+ return sn;
+ }
+ /* LCMapStringW returns byte count including the terminating NUL character.
+ strxfrm is supposed to return length excluding the NUL. */
+ return ret - 1;
+}
+
+/* Fetch default ANSI codepage from locale info and generate a setlocale
+ compatible character set code. Called from newlib's setlocale(), if the
+ charset isn't given explicitely in the POSIX compatible locale specifier.
+ The function also returns a pointer to the corresponding _mbtowc_r function
+ which is used subsequently. */
+extern "C" void
+__set_charset_from_locale (const char *locale, char *charset)
+{
+ UINT cp;
+ LCID lcid = __get_lcid_from_locale (locale);
+
+ /* "C" locale, or invalid locale? */
+ if (lcid == 0 || lcid == (LCID) -1)
+ {
+ __small_sprintf (charset, "ASCII");
+ return;
+ }
+ if (!GetLocaleInfoW (lcid,
+ LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
+ (PWCHAR) &cp, sizeof cp))
+ cp = 0;
+ /* codepage to de-facto standard charset transition. */
+ switch (cp)
+ {
+ case 874:
+ __small_sprintf (charset, "CP%u", cp);
+ break;
+ case 932:
+ strcpy (charset, "EUCJP");
+ break;
+ case 936:
+ strcpy (charset, "GBK");
+ break;
+ case 949:
+ strcpy (charset, "EUCKR");
+ break;
+ case 950:
+ strcpy (charset, "BIG5");
+ break;
+ case 1250:
+ strcpy (charset, "ISO-8859-2");
+ break;
+ case 1251:
+ strcpy (charset, "ISO-8859-5");
+ break;
+ case 1252:
+ strcpy (charset, "ISO-8859-1");
+ break;
+ case 1253:
+ strcpy (charset, "ISO-8859-7");
+ break;
+ case 1254:
+ strcpy (charset, "ISO-8859-9");
+ break;
+ case 1255:
+ strcpy (charset, "ISO-8859-8");
+ break;
+ case 1256:
+ strcpy (charset, "ISO-8859-6");
+ break;
+ case 1257:
+ strcpy (charset, "ISO-8859-13");
+ break;
+ case 1258:
+ default:
+ strcpy (charset, "UTF-8");
+ break;
+ }
+ if (cp >= 1250 && cp <= 1257)
+ {
+ char *c = strchr (locale, '@');
+ if (c && !strcmp (c + 1, "euro"))
+ strcpy (charset, "ISO-8859-15");
+ }
+}