diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2016-07-20 22:05:59 +0200 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2016-08-15 10:56:57 +0200 |
commit | d16a56306d63b4d94412b479a8ea83463a3514ab (patch) | |
tree | b37c2954976f314628637e660e039f1be4034b1c /winsup/cygwin | |
parent | 88208d3735821df0f5a66c5f8781282a7f5bf284 (diff) | |
download | newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.zip newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.tar.gz newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.tar.bz2 |
Consolidate wctomb/mbtowc calls for POSIX-1.2008
- Remove charset parameter from low level __foo_wctomb/__foo_mbtowc calls.
- Instead, create array of function for ISO and Windows codepages to point
to function which does not require to evaluate the charset string on
each call. Create matching helper functions. I.e., __iso_wctomb,
__iso_mbtowc, __cp_wctomb and __cp_mbtowc are functions returning the
right function pointer now.
- Create __WCTOMB/__MBTOWC macros utilizing per-reent locale and replace
calls to __wctomb/__mbtowc with calls to __WCTOMB/__MBTOWC.
- Drop global __wctomb/__mbtowc vars.
- Utilize aforementioned changes in Cygwin to get rid of charset in other,
calling functions and simplify the code.
- In Cygwin restrict global cygheap locale info to the job performed
by internal_setlocale. Use UTF-8 instead of ASCII on the fly in
internal conversion functions.
- In Cygwin dll_entry, make sure to initialize a TLS area with a NULL
_REENT->_locale pointer. Add comment to explain why.
Signed-off by: Corinna Vinschen <corinna@vinschen.de>
Diffstat (limited to 'winsup/cygwin')
-rw-r--r-- | winsup/cygwin/cygheap.cc | 4 | ||||
-rw-r--r-- | winsup/cygwin/cygheap.h | 2 | ||||
-rw-r--r-- | winsup/cygwin/fhandler.h | 2 | ||||
-rw-r--r-- | winsup/cygwin/fhandler_console.cc | 35 | ||||
-rw-r--r-- | winsup/cygwin/init.cc | 9 | ||||
-rw-r--r-- | winsup/cygwin/nlsfuncs.cc | 95 | ||||
-rw-r--r-- | winsup/cygwin/strfuncs.cc | 60 | ||||
-rw-r--r-- | winsup/cygwin/wchar.h | 42 |
8 files changed, 109 insertions, 140 deletions
diff --git a/winsup/cygwin/cygheap.cc b/winsup/cygwin/cygheap.cc index 11f868f..87a5eb9 100644 --- a/winsup/cygwin/cygheap.cc +++ b/winsup/cygwin/cygheap.cc @@ -28,7 +28,7 @@ static mini_cygheap NO_COPY cygheap_dummy = { - {__utf8_mbtowc, __utf8_wctomb} + {__utf8_mbtowc} }; init_cygheap NO_COPY *cygheap = (init_cygheap *) &cygheap_dummy; @@ -245,8 +245,6 @@ cygheap_init () cygheap->bucket_val[b] = sz[b & 1]; /* Default locale settings. */ cygheap->locale.mbtowc = __utf8_mbtowc; - cygheap->locale.wctomb = __utf8_wctomb; - strcpy (cygheap->locale.charset, "UTF-8"); /* Set umask to a sane default. */ cygheap->umask = 022; cygheap->rlim_core = RLIM_INFINITY; diff --git a/winsup/cygwin/cygheap.h b/winsup/cygwin/cygheap.h index e2807e2..abbf9ec 100644 --- a/winsup/cygwin/cygheap.h +++ b/winsup/cygwin/cygheap.h @@ -346,8 +346,6 @@ struct cygheap_debug struct cygheap_locale { mbtowc_p mbtowc; - wctomb_p wctomb; - char charset[ENCODING_LEN + 1]; }; struct user_heap_info diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h index 3321523..c7db8f8 100644 --- a/winsup/cygwin/fhandler.h +++ b/winsup/cygwin/fhandler.h @@ -1355,7 +1355,7 @@ class dev_console inline UINT get_console_cp (); DWORD con_to_str (char *d, int dlen, WCHAR w); - DWORD str_to_con (mbtowc_p, const char *, PWCHAR d, const char *s, DWORD sz); + DWORD str_to_con (mbtowc_p, PWCHAR d, const char *s, DWORD sz); void set_color (HANDLE); void set_default_attr (); int set_cl_x (cltype); diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc index 76aff0f..45fe882 100644 --- a/winsup/cygwin/fhandler_console.cc +++ b/winsup/cygwin/fhandler_console.cc @@ -225,10 +225,9 @@ dev_console::get_console_cp () } inline DWORD -dev_console::str_to_con (mbtowc_p f_mbtowc, const char *charset, - PWCHAR d, const char *s, DWORD sz) +dev_console::str_to_con (mbtowc_p f_mbtowc, PWCHAR d, const char *s, DWORD sz) { - return sys_cp_mbstowcs (f_mbtowc, charset, d, CONVERT_LIMIT, s, sz); + return sys_cp_mbstowcs (f_mbtowc, d, CONVERT_LIMIT, s, sz); } bool @@ -2002,21 +2001,10 @@ fhandler_console::write_normal (const unsigned char *src, const unsigned char *found = src; size_t ret; mbstate_t ps; - UINT cp = con.get_console_cp (); - const char *charset; mbtowc_p f_mbtowc; - if (cp) - { - /* The alternate charset is always 437, just as in the Linux console. */ - f_mbtowc = __cp_mbtowc; - charset = "CP437"; - } - else - { - f_mbtowc = cygheap->locale.mbtowc; - charset = cygheap->locale.charset; - } + /* The alternate charset is always 437, just as in the Linux console. */ + f_mbtowc = con.get_console_cp () ? __cp_mbtowc (437) : __MBTOWC; /* First check if we have cached lead bytes of a former try to write a truncated multibyte sequence. If so, process it. */ @@ -2027,7 +2015,7 @@ fhandler_console::write_normal (const unsigned char *src, memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len); memset (&ps, 0, sizeof ps); switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf, - trunc_buf.len + cp_len, charset, &ps)) + trunc_buf.len + cp_len, &ps)) { case -2: /* Still truncated multibyte sequence? Keep in trunc_buf. */ @@ -2052,9 +2040,9 @@ fhandler_console::write_normal (const unsigned char *src, /* Valid multibyte sequence? Process. */ if (nfound) { - buf_len = con.str_to_con (f_mbtowc, charset, write_buf, - (const char *) trunc_buf.buf, - nfound - trunc_buf.buf); + buf_len = con.str_to_con (f_mbtowc, write_buf, + (const char *) trunc_buf.buf, + nfound - trunc_buf.buf); if (!write_console (write_buf, buf_len, done)) { debug_printf ("multibyte sequence write failed, handle %p", get_output_handle ()); @@ -2075,7 +2063,7 @@ fhandler_console::write_normal (const unsigned char *src, && base_chars[*found] == NOR) { switch (ret = f_mbtowc (_REENT, NULL, (const char *) found, - end - found, charset, &ps)) + end - found, &ps)) { case -2: /* Truncated multibyte sequence. Store for next write. */ trunc_buf.len = end - found; @@ -2098,8 +2086,7 @@ do_print: if (found != src) { DWORD len = found - src; - buf_len = con.str_to_con (f_mbtowc, charset, write_buf, - (const char *) src, len); + buf_len = con.str_to_con (f_mbtowc, write_buf, (const char *) src, len); if (!buf_len) { debug_printf ("conversion error, handle %p", @@ -2178,7 +2165,7 @@ do_print: if (found + 1 < end) { ret = __utf8_mbtowc (_REENT, NULL, (const char *) found + 1, - end - found - 1, NULL, &ps); + end - found - 1, &ps); if (ret != (size_t) -1) while (ret-- > 0) { diff --git a/winsup/cygwin/init.cc b/winsup/cygwin/init.cc index 7285e3d..1728105 100644 --- a/winsup/cygwin/init.cc +++ b/winsup/cygwin/init.cc @@ -83,6 +83,15 @@ dll_entry (HANDLE h, DWORD reason, void *static_load) cygwin_hmodule = (HMODULE) h; dynamically_loaded = (static_load == NULL); + /* Starting with adding the POSIX-1.2008 per-thread locale functionality, + we need an initalized _REENT area even for the functions called from + dll_crt0_0. In fact, we only need the _REENT->_locale pointer + initialized to NULL, so subsequent calls to locale-specific functions + will always fall back to __global_locale, rather then crash due to + _REENT->_locale having an arbitrary value. */ + (void) alloca (CYGTLS_PADSIZE); + _REENT->_locale = NULL; + dll_crt0_0 (); _my_oldfunc = TlsAlloc (); dll_finished_loading = true; diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index 6dde251..2ba9f32 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -16,8 +16,6 @@ details. */ #include "dtable.h" #include "cygheap.h" #include "tls_pbuf.h" -/* Internal headers from newlib */ -#include "../locale/setlocale.h" #include "lc_msg.h" #include "lc_era.h" @@ -31,8 +29,7 @@ details. */ __eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\ lc_time_end-lc_time_ptr) #define charfromwchar(category,in) \ - __charfromwchar (_##category##_locale->in,_LC(category),\ - f_wctomb,charset) + __charfromwchar (_##category##_locale->in,_LC(category),f_wctomb) #define has_modifier(x) ((x)[0] && !strcmp (modifier, (x))) @@ -159,8 +156,7 @@ __get_lcid_from_locale (const char *name) is set, s==NULL returns -1 since then it's used to recognize invalid strings in the used charset. */ static size_t -lc_wcstombs (wctomb_p f_wctomb, const char *charset, - char *s, const wchar_t *pwcs, size_t n, +lc_wcstombs (wctomb_p f_wctomb, char *s, const wchar_t *pwcs, size_t n, bool return_invalid = false) { char *ptr = s; @@ -175,7 +171,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset, size_t num_bytes = 0; while (*pwcs != 0) { - bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state); + bytes = f_wctomb (_REENT, buf, *pwcs++, &state); if (bytes != (size_t) -1) num_bytes += bytes; else if (return_invalid) @@ -185,7 +181,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset, } while (n > 0) { - bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state); + bytes = f_wctomb (_REENT, buf, *pwcs, &state); if (bytes == (size_t) -1) { memset (&state, 0, sizeof state); @@ -207,8 +203,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset, /* Never returns -1. Invalid sequences are translated to replacement wide-chars. */ static size_t -lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset, - wchar_t *pwcs, const char *s, size_t n) +lc_mbstowcs (mbtowc_p f_mbtowc, wchar_t *pwcs, const char *s, size_t n) { size_t ret = 0; char *t = (char *) s; @@ -220,8 +215,7 @@ lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset, n = 1; while (n > 0) { - bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, - charset, &state); + bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, &state); if (bytes == (size_t) -1) { state.__count = 0; @@ -294,13 +288,12 @@ __setlocaleinfo (char **ptr, size_t size, wchar_t val) } static char * -__charfromwchar (const wchar_t *in, char **ptr, size_t size, - wctomb_p f_wctomb, const char *charset) +__charfromwchar (const wchar_t *in, char **ptr, size_t size, wctomb_p f_wctomb) { size_t num; char *ret; - num = lc_wcstombs (f_wctomb, charset, ret = *ptr, in, size); + num = lc_wcstombs (f_wctomb, ret = *ptr, in, size); *ptr += num + 1; return ret; } @@ -600,11 +593,11 @@ __set_lc_time_from_win (const char *name, /* Evaluate string length in target charset. Characters invalid in the target charset are simply ignored, as on Linux. */ len = 0; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_fmt, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_t_fmt, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era_t_fmt, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->alt_digits, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era_d_fmt, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era_d_t_fmt, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era_t_fmt, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->alt_digits, 0) + 1; len += (wcslen (era->era) + 1) * sizeof (wchar_t); len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t); len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t); @@ -742,8 +735,7 @@ __set_lc_ctype_from_win (const char *name, lc_ctype_ptr = (char *) woutdig; _ctype_locale->outdigits[i] = lc_ctype_ptr; memset (&state, 0, sizeof state); - lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], charset, - &state); + lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], &state); *lc_ctype_ptr++ = '\0'; } } @@ -885,8 +877,7 @@ __set_lc_monetary_from_win (const char *name, LOCALE_SCURRENCY); /* As on Linux: If the currency_symbol can't be represented in the given charset, use int_curr_symbol. */ - if (lc_wcstombs (f_wctomb, charset, NULL, - _monetary_locale->wcurrency_symbol, + if (lc_wcstombs (f_wctomb, NULL, _monetary_locale->wcurrency_symbol, 0, true) == (size_t) -1) _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol; else @@ -1026,10 +1017,10 @@ __set_lc_messages_from_win (const char *name, len += (strlen (charset) + 1); if (lcid) { - len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesexpr, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, msg->noexpr, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesstr, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, msg->nostr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->yesstr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->nostr, 0) + 1; len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t); len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t); len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t); @@ -1051,13 +1042,13 @@ __set_lc_messages_from_win (const char *name, if (lcid) { _messages_locale->yesexpr = (const char *) c; - len = lc_wcstombs (f_wctomb, charset, c, msg->yesexpr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c); _messages_locale->noexpr = (const char *) (c += len + 1); - len = lc_wcstombs (f_wctomb, charset, c, msg->noexpr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->noexpr, lc_messages_end - c); _messages_locale->yesstr = (const char *) (c += len + 1); - len = lc_wcstombs (f_wctomb, charset, c, msg->yesstr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->yesstr, lc_messages_end - c); _messages_locale->nostr = (const char *) (c += len + 1); - len = lc_wcstombs (f_wctomb, charset, c, msg->nostr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->nostr, lc_messages_end - c); c += len + 1; if ((uintptr_t) c % 1) ++c; @@ -1149,15 +1140,14 @@ strcoll (const char *__restrict s1, const char *__restrict s2) /* The ANSI version of CompareString uses the default charset of the lcid, so we must use the Unicode version. */ mbtowc_p collate_mbtowc = __get_current_collate_locale ()->mbtowc; - const char *collate_charset = __get_current_collate_locale ()->codeset; - n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1; + n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1; ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t)) : tp.w_get ()); - lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1); - n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1; + lc_mbstowcs (collate_mbtowc, ws1, s1, n1); + n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1; ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) : tp.w_get ()); - lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2); + lc_mbstowcs (collate_mbtowc, ws2, s2, n2); ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1); if (n1 > NT_MAX_PATH) free (ws1); @@ -1226,13 +1216,12 @@ strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn) /* The ANSI version of LCMapString uses the default charset of the lcid, so we must use the Unicode version. */ mbtowc_p collate_mbtowc = __get_current_collate_locale ()->mbtowc; - const char *collate_charset = __get_current_collate_locale ()->codeset; - n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1; + n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1; ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) : tp.w_get ()); if (ws2) { - lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2); + lc_mbstowcs (collate_mbtowc, ws2, s2, n2); /* The sort key is a NUL-terminated byte string. */ ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn); @@ -1474,7 +1463,7 @@ __set_locale_from_locale_alias (const char *locale, char *new_locale) if (strlen (replace) > ENCODING_LEN) continue; /* The file is latin1 encoded */ - lc_mbstowcs (__iso_mbtowc, "ISO-8859-1", walias, alias, ENCODING_LEN + 1); + lc_mbstowcs (__iso_mbtowc (1), walias, alias, ENCODING_LEN + 1); walias[ENCODING_LEN] = L'\0'; if (!wcscmp (wlocale, walias)) { @@ -1503,33 +1492,25 @@ internal_setlocale () wchar_t *w_path = NULL, *w_cwd; /* Don't do anything if the charset hasn't actually changed. */ - if (strcmp (cygheap->locale.charset, __locale_charset ()) == 0) + if (cygheap->locale.mbtowc == __global_locale.mbtowc) return; - debug_printf ("Cygwin charset changed from %s to %s", - cygheap->locale.charset, __locale_charset ()); + debug_printf ("Cygwin charset chang to %s", __locale_charset ()); /* Fetch PATH and CWD and convert to wchar_t in previous charset. */ path = getenv ("PATH"); if (path && *path) /* $PATH can be potentially unset. */ { w_path = tp.w_get (); - sys_mbstowcs (w_path, 32768, path); + sys_cp_mbstowcs (cygheap->locale.mbtowc, w_path, 32768, path); } w_cwd = tp.w_get (); cwdstuff::cwd_lock.acquire (); - sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ()); + sys_cp_mbstowcs (cygheap->locale.mbtowc, w_cwd, 32768, + cygheap->cwd.get_posix ()); /* Set charset for internal conversion functions. */ - if (*__locale_charset () == 'A'/*SCII*/) - { - cygheap->locale.mbtowc = __utf8_mbtowc; - cygheap->locale.wctomb = __utf8_wctomb; - } - else - { - cygheap->locale.mbtowc = __mbtowc; - cygheap->locale.wctomb = __wctomb; - } - strcpy (cygheap->locale.charset, __locale_charset ()); + cygheap->locale.mbtowc = __global_locale.mbtowc; + if (cygheap->locale.mbtowc == __ascii_mbtowc) + cygheap->locale.mbtowc = __utf8_mbtowc; /* Restore CWD and PATH in new charset. */ cygheap->cwd.reset_posix (w_cwd); cwdstuff::cwd_lock.release (); diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 40f2c29..c962f7c 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -140,15 +140,13 @@ __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp) } extern "C" int -__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 932); } extern "C" int -__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { /* Unfortunately, the Windows eucJP codepage 20932 is not really 100% compatible to eucJP. It's a cute approximation which makes it a @@ -192,22 +190,19 @@ __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, } extern "C" int -__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 936); } extern "C" int -__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 949); } extern "C" int -__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 950); } @@ -268,14 +263,14 @@ __db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, UINT cp, extern "C" int __sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 932, state); } extern "C" int __eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { /* See comment in __eucjp_wctomb above. */ wchar_t dummy; @@ -352,21 +347,21 @@ jis_x_0212: extern "C" int __gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 936, state); } extern "C" int __kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 949, state); } extern "C" int __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 950, state); } @@ -408,7 +403,7 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, */ static size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, - bool is_path) + bool is_path) { char buf[10]; char *ptr = dst; @@ -416,9 +411,10 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, size_t n = 0; mbstate_t ps; save_errno save; - wctomb_p f_wctomb = cygheap->locale.wctomb; - const char *charset = cygheap->locale.charset; + wctomb_p f_wctomb = __WCTOMB; + if (f_wctomb == __ascii_wctomb) + f_wctomb = __utf8_wctomb; memset (&ps, 0, sizeof ps); if (dst == NULL) len = (size_t) -1; @@ -441,13 +437,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, } else { - bytes = f_wctomb (_REENT, buf, pw, charset, &ps); - if (bytes == -1 && *charset != 'U'/*TF-8*/) + bytes = f_wctomb (_REENT, buf, pw, &ps); + if (bytes == -1 && f_wctomb != __utf8_wctomb) { /* Convert chars invalid in the current codepage to a sequence ASCII CAN; UTF-8 representation of invalid char. */ buf[0] = 0x18; /* ASCII CAN */ - bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps); + bytes = __utf8_wctomb (_REENT, buf + 1, pw, &ps); if (bytes == -1) { ++pwcs; @@ -465,8 +461,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, ps.__count = 0; continue; } - bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, charset, - &ps); + bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, &ps); nwc--; } } @@ -557,8 +552,8 @@ sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src, charset, which is the charset returned by GetConsoleCP (). Most of the time this is used for box and line drawing characters. */ size_t __reg3 -sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, - size_t dlen, const char *src, size_t nms) +sys_cp_mbstowcs (mbtowc_p f_mbtowc, wchar_t *dst, size_t dlen, + const char *src, size_t nms) { wchar_t *ptr = dst; unsigned const char *pmbs = (unsigned const char *) src; @@ -581,10 +576,11 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, next byte must be a valid UTF-8 start byte. If the charset isn't UTF-8 anyway, try to convert the following bytes as UTF-8 sequence. */ - if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4 && *charset != 'U'/*TF-8*/) + if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4 + && f_mbtowc != __utf8_mbtowc) { bytes = __utf8_mbtowc (_REENT, ptr, (const char *) pmbs + 1, - nms - 1, charset, &ps); + nms - 1, &ps); if (bytes < 0) { /* Invalid UTF-8 sequence? Treat the ASCII CAN character as @@ -603,7 +599,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, wchar_t *ptr2 = dst ? ptr + 1 : NULL; int bytes2 = __utf8_mbtowc (_REENT, ptr2, (const char *) pmbs + bytes, - nms - bytes, charset, &ps); + nms - bytes, &ps); if (bytes2 < 0) memset (&ps, 0, sizeof ps); else @@ -625,7 +621,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, } } else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms, - charset, &ps)) < 0) + &ps)) < 0) { /* The technique is based on a discussion here: http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html @@ -668,8 +664,10 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, size_t __reg3 sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, size_t nms) { - return sys_cp_mbstowcs (cygheap->locale.mbtowc, cygheap->locale.charset, - dst, dlen, src, nms); + mbtowc_p f_mbtowc = __MBTOWC; + if (f_mbtowc == __ascii_mbtowc) + f_mbtowc = __utf8_mbtowc; + return sys_cp_mbstowcs (f_mbtowc, dst, dlen, src, nms); } /* Same as sys_wcstombs_alloc, just backwards. */ diff --git a/winsup/cygwin/wchar.h b/winsup/cygwin/wchar.h index 1bffd63..b3dacf3 100644 --- a/winsup/cygwin/wchar.h +++ b/winsup/cygwin/wchar.h @@ -11,6 +11,9 @@ details. */ #include_next <wchar.h> +/* Internal headers from newlib */ +#include "../locale/setlocale.h" + #define ENCODING_LEN 31 #ifdef __cplusplus @@ -18,29 +21,23 @@ extern "C" { #endif typedef int mbtowc_f (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); + mbstate_t *); typedef mbtowc_f *mbtowc_p; -extern mbtowc_p __mbtowc; extern mbtowc_f __ascii_mbtowc; extern mbtowc_f __utf8_mbtowc; -extern mbtowc_f __iso_mbtowc; -extern mbtowc_f __cp_mbtowc; -extern mbtowc_f __sjis_mbtowc; -extern mbtowc_f __eucjp_mbtowc; -extern mbtowc_f __gbk_mbtowc; -extern mbtowc_f __kr_mbtowc; -extern mbtowc_f __big5_mbtowc; - -typedef int wctomb_f (struct _reent *, char *, wchar_t, const char *, - mbstate_t *); +extern mbtowc_p __iso_mbtowc (int); +extern mbtowc_p __cp_mbtowc (int); + +#define __MBTOWC (__get_current_locale ()->mbtowc) + +typedef int wctomb_f (struct _reent *, char *, wchar_t, mbstate_t *); typedef wctomb_f *wctomb_p; -extern wctomb_p __wctomb; extern wctomb_f __ascii_wctomb; extern wctomb_f __utf8_wctomb; -extern char *__locale_charset (); +#define __WCTOMB (__get_current_locale ()->wctomb) #ifdef __cplusplus } @@ -49,20 +46,21 @@ extern char *__locale_charset (); #ifdef __INSIDE_CYGWIN__ #ifdef __cplusplus size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t * src, - size_t nwc = (size_t) -1); + size_t nwc = (size_t) -1); size_t __reg3 sys_wcstombs_no_path (char *dst, size_t len, - const wchar_t * src, size_t nwc = (size_t) -1); + const wchar_t * src, + size_t nwc = (size_t) -1); size_t __reg3 sys_wcstombs_alloc (char **, int, const wchar_t *, - size_t = (size_t) -1); + size_t = (size_t) -1); size_t __reg3 sys_wcstombs_alloc_no_path (char **, int, const wchar_t *, - size_t = (size_t) -1); + size_t = (size_t) -1); -size_t __reg3 sys_cp_mbstowcs (mbtowc_p, const char *, wchar_t *, size_t, - const char *, size_t = (size_t) -1); +size_t __reg3 sys_cp_mbstowcs (mbtowc_p, wchar_t *, size_t, const char *, + size_t = (size_t) -1); size_t __reg3 sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, - size_t nms = (size_t) -1); + size_t nms = (size_t) -1); size_t __reg3 sys_mbstowcs_alloc (wchar_t **, int, const char *, - size_t = (size_t) -1); + size_t = (size_t) -1); #endif /* __cplusplus */ #endif /* __INSIDE_CYGWIN__ */ |