Consolidate wctomb/mbtowc calls for POSIX-1.2008

- Remove charset parameter from low level __foo_wctomb/__foo_mbtowc calls. - Instead, create array of function for ISO and Windows codepages to point to function which does not require to evaluate the charset string on each call. Create matching helper functions. I.e., __iso_wctomb, __iso_mbtowc, __cp_wctomb and __cp_mbtowc are functions returning the right function pointer now. - Create __WCTOMB/__MBTOWC macros utilizing per-reent locale and replace calls to __wctomb/__mbtowc with calls to __WCTOMB/__MBTOWC. - Drop global __wctomb/__mbtowc vars. - Utilize aforementioned changes in Cygwin to get rid of charset in other, calling functions and simplify the code. - In Cygwin restrict global cygheap locale info to the job performed by internal_setlocale. Use UTF-8 instead of ASCII on the fly in internal conversion functions. - In Cygwin dll_entry, make sure to initialize a TLS area with a NULL _REENT->_locale pointer. Add comment to explain why. Signed-off by: Corinna Vinschen <corinna@vinschen.de>
author: Corinna Vinschen <corinna@vinschen.de> 2016-07-20 22:05:59 +0200
committer: Corinna Vinschen <corinna@vinschen.de> 2016-08-15 10:56:57 +0200
commit: d16a56306d63b4d94412b479a8ea83463a3514ab (patch)
tree: b37c2954976f314628637e660e039f1be4034b1c /winsup
parent: 88208d3735821df0f5a66c5f8781282a7f5bf284 (diff)
download: newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.zip
newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.tar.gz
newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.tar.bz2
8 files changed, 109 insertions, 140 deletions
diff --git a/winsup/cygwin/cygheap.cc b/winsup/cygwin/cygheap.cc
index 11f868f..87a5eb9 100644
--- a/winsup/cygwin/cygheap.cc
+++ b/winsup/cygwin/cygheap.cc
@@ -28,7 +28,7 @@
 
 static mini_cygheap NO_COPY cygheap_dummy =
 {
-  {__utf8_mbtowc, __utf8_wctomb}
+  {__utf8_mbtowc}
 };
 
 init_cygheap NO_COPY *cygheap = (init_cygheap *) &cygheap_dummy;
@@ -245,8 +245,6 @@ cygheap_init ()
 	cygheap->bucket_val[b] = sz[b & 1];
       /* Default locale settings. */
       cygheap->locale.mbtowc = __utf8_mbtowc;
-      cygheap->locale.wctomb = __utf8_wctomb;
-      strcpy (cygheap->locale.charset, "UTF-8");
       /* Set umask to a sane default. */
       cygheap->umask = 022;
       cygheap->rlim_core = RLIM_INFINITY;
diff --git a/winsup/cygwin/cygheap.h b/winsup/cygwin/cygheap.h
index e2807e2..abbf9ec 100644
--- a/winsup/cygwin/cygheap.h
+++ b/winsup/cygwin/cygheap.h
@@ -346,8 +346,6 @@ struct cygheap_debug
 struct cygheap_locale
 {
   mbtowc_p mbtowc;
-  wctomb_p wctomb;
-  char charset[ENCODING_LEN + 1];
 };
 
 struct user_heap_info
diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h
index 3321523..c7db8f8 100644
--- a/winsup/cygwin/fhandler.h
+++ b/winsup/cygwin/fhandler.h
@@ -1355,7 +1355,7 @@ class dev_console
 
   inline UINT get_console_cp ();
   DWORD con_to_str (char *d, int dlen, WCHAR w);
-  DWORD str_to_con (mbtowc_p, const char *, PWCHAR d, const char *s, DWORD sz);
+  DWORD str_to_con (mbtowc_p, PWCHAR d, const char *s, DWORD sz);
   void set_color (HANDLE);
   void set_default_attr ();
   int set_cl_x (cltype);
diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc
index 76aff0f..45fe882 100644
--- a/winsup/cygwin/fhandler_console.cc
+++ b/winsup/cygwin/fhandler_console.cc
@@ -225,10 +225,9 @@ dev_console::get_console_cp ()
 }
 
 inline DWORD
-dev_console::str_to_con (mbtowc_p f_mbtowc, const char *charset,
-			 PWCHAR d, const char *s, DWORD sz)
+dev_console::str_to_con (mbtowc_p f_mbtowc, PWCHAR d, const char *s, DWORD sz)
 {
-  return sys_cp_mbstowcs (f_mbtowc, charset, d, CONVERT_LIMIT, s, sz);
+  return sys_cp_mbstowcs (f_mbtowc, d, CONVERT_LIMIT, s, sz);
 }
 
 bool
@@ -2002,21 +2001,10 @@ fhandler_console::write_normal (const unsigned char *src,
   const unsigned char *found = src;
   size_t ret;
   mbstate_t ps;
-  UINT cp = con.get_console_cp ();
-  const char *charset;
   mbtowc_p f_mbtowc;
 
-  if (cp)
-    {
-      /* The alternate charset is always 437, just as in the Linux console. */
-      f_mbtowc = __cp_mbtowc;
-      charset = "CP437";
-    }
-  else
-    {
-      f_mbtowc = cygheap->locale.mbtowc;
-      charset = cygheap->locale.charset;
-    }
+  /* The alternate charset is always 437, just as in the Linux console. */
+  f_mbtowc = con.get_console_cp () ? __cp_mbtowc (437) : __MBTOWC;
 
   /* First check if we have cached lead bytes of a former try to write
      a truncated multibyte sequence.  If so, process it. */
@@ -2027,7 +2015,7 @@ fhandler_console::write_normal (const unsigned char *src,
       memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len);
       memset (&ps, 0, sizeof ps);
       switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf,
-			       trunc_buf.len + cp_len, charset, &ps))
+			       trunc_buf.len + cp_len, &ps))
 	{
 	case -2:
 	  /* Still truncated multibyte sequence?  Keep in trunc_buf. */
@@ -2052,9 +2040,9 @@ fhandler_console::write_normal (const unsigned char *src,
       /* Valid multibyte sequence?  Process. */
       if (nfound)
 	{
-	  buf_len = con.str_to_con (f_mbtowc, charset, write_buf,
-					   (const char *) trunc_buf.buf,
-					   nfound - trunc_buf.buf);
+	  buf_len = con.str_to_con (f_mbtowc, write_buf,
+				    (const char *) trunc_buf.buf,
+				    nfound - trunc_buf.buf);
 	  if (!write_console (write_buf, buf_len, done))
 	    {
 	      debug_printf ("multibyte sequence write failed, handle %p", get_output_handle ());
@@ -2075,7 +2063,7 @@ fhandler_console::write_normal (const unsigned char *src,
 	 && base_chars[*found] == NOR)
     {
       switch (ret = f_mbtowc (_REENT, NULL, (const char *) found,
-			       end - found, charset, &ps))
+			       end - found, &ps))
 	{
 	case -2: /* Truncated multibyte sequence.  Store for next write. */
 	  trunc_buf.len = end - found;
@@ -2098,8 +2086,7 @@ do_print:
   if (found != src)
     {
       DWORD len = found - src;
-      buf_len = con.str_to_con (f_mbtowc, charset, write_buf,
-				       (const char *) src, len);
+      buf_len = con.str_to_con (f_mbtowc, write_buf, (const char *) src, len);
       if (!buf_len)
 	{
 	  debug_printf ("conversion error, handle %p",
@@ -2178,7 +2165,7 @@ do_print:
 	      if (found + 1 < end)
 		{
 		  ret = __utf8_mbtowc (_REENT, NULL, (const char *) found + 1,
-				       end - found - 1, NULL, &ps);
+				       end - found - 1, &ps);
 		  if (ret != (size_t) -1)
 		    while (ret-- > 0)
 		      {
diff --git a/winsup/cygwin/init.cc b/winsup/cygwin/init.cc
index 7285e3d..1728105 100644
--- a/winsup/cygwin/init.cc
+++ b/winsup/cygwin/init.cc
@@ -83,6 +83,15 @@ dll_entry (HANDLE h, DWORD reason, void *static_load)
       cygwin_hmodule = (HMODULE) h;
       dynamically_loaded = (static_load == NULL);
 
+      /* Starting with adding the POSIX-1.2008 per-thread locale functionality,
+	 we need an initalized _REENT area even for the functions called from
+	 dll_crt0_0.  In fact, we only need the _REENT->_locale pointer
+	 initialized to NULL, so subsequent calls to locale-specific functions
+	 will always fall back to __global_locale, rather then crash due to
+	 _REENT->_locale having an arbitrary value. */
+      (void) alloca (CYGTLS_PADSIZE);
+      _REENT->_locale = NULL;
+
       dll_crt0_0 ();
       _my_oldfunc = TlsAlloc ();
       dll_finished_loading = true;
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
index 6dde251..2ba9f32 100644
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -16,8 +16,6 @@ details. */
 #include "dtable.h"
 #include "cygheap.h"
 #include "tls_pbuf.h"
-/* Internal headers from newlib */
-#include "../locale/setlocale.h"
 #include "lc_msg.h"
 #include "lc_era.h"
 
@@ -31,8 +29,7 @@ details. */
 	    __eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\
 			       lc_time_end-lc_time_ptr)
 #define charfromwchar(category,in) \
-	    __charfromwchar (_##category##_locale->in,_LC(category),\
-			     f_wctomb,charset)
+	    __charfromwchar (_##category##_locale->in,_LC(category),f_wctomb)
 
 #define has_modifier(x)	((x)[0] && !strcmp (modifier, (x)))
 
@@ -159,8 +156,7 @@ __get_lcid_from_locale (const char *name)
    is set, s==NULL returns -1 since then it's used to recognize invalid strings
    in the used charset. */
 static size_t
-lc_wcstombs (wctomb_p f_wctomb, const char *charset,
-	     char *s, const wchar_t *pwcs, size_t n,
+lc_wcstombs (wctomb_p f_wctomb, char *s, const wchar_t *pwcs, size_t n,
 	     bool return_invalid = false)
 {
   char *ptr = s;
@@ -175,7 +171,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset,
       size_t num_bytes = 0;
       while (*pwcs != 0)
 	{
-	  bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state);
+	  bytes = f_wctomb (_REENT, buf, *pwcs++, &state);
 	  if (bytes != (size_t) -1)
 	    num_bytes += bytes;
 	  else if (return_invalid)
@@ -185,7 +181,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset,
     }
   while (n > 0)
     {
-      bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state);
+      bytes = f_wctomb (_REENT, buf, *pwcs, &state);
       if (bytes == (size_t) -1)
 	{
 	  memset (&state, 0, sizeof state);
@@ -207,8 +203,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset,
 /* Never returns -1.  Invalid sequences are translated to replacement
    wide-chars. */
 static size_t
-lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset,
-	     wchar_t *pwcs, const char *s, size_t n)
+lc_mbstowcs (mbtowc_p f_mbtowc, wchar_t *pwcs, const char *s, size_t n)
 {
   size_t ret = 0;
   char *t = (char *) s;
@@ -220,8 +215,7 @@ lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset,
     n = 1;
   while (n > 0)
     {
-      bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */,
-			charset, &state);
+      bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, &state);
       if (bytes == (size_t) -1)
 	{
 	  state.__count = 0;
@@ -294,13 +288,12 @@ __setlocaleinfo (char **ptr, size_t size, wchar_t val)
 }
 
 static char *
-__charfromwchar (const wchar_t *in, char **ptr, size_t size,
-		 wctomb_p f_wctomb, const char *charset)
+__charfromwchar (const wchar_t *in, char **ptr, size_t size, wctomb_p f_wctomb)
 {
   size_t num;
   char *ret;
 
-  num = lc_wcstombs (f_wctomb, charset, ret = *ptr, in, size);
+  num = lc_wcstombs (f_wctomb, ret = *ptr, in, size);
   *ptr += num + 1;
   return ret;
 }
@@ -600,11 +593,11 @@ __set_lc_time_from_win (const char *name,
 	  /* Evaluate string length in target charset.  Characters invalid in the
 	     target charset are simply ignored, as on Linux. */
 	  len = 0;
-	  len += lc_wcstombs (f_wctomb, charset, NULL, era->era, 0) + 1;
-	  len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_fmt, 0) + 1;
-	  len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_t_fmt, 0) + 1;
-	  len += lc_wcstombs (f_wctomb, charset, NULL, era->era_t_fmt, 0) + 1;
-	  len += lc_wcstombs (f_wctomb, charset, NULL, era->alt_digits, 0) + 1;
+	  len += lc_wcstombs (f_wctomb, NULL, era->era, 0) + 1;
+	  len += lc_wcstombs (f_wctomb, NULL, era->era_d_fmt, 0) + 1;
+	  len += lc_wcstombs (f_wctomb, NULL, era->era_d_t_fmt, 0) + 1;
+	  len += lc_wcstombs (f_wctomb, NULL, era->era_t_fmt, 0) + 1;
+	  len += lc_wcstombs (f_wctomb, NULL, era->alt_digits, 0) + 1;
 	  len += (wcslen (era->era) + 1) * sizeof (wchar_t);
 	  len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
 	  len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
@@ -742,8 +735,7 @@ __set_lc_ctype_from_win (const char *name,
 	  lc_ctype_ptr = (char *) woutdig;
 	  _ctype_locale->outdigits[i] = lc_ctype_ptr;
 	  memset (&state, 0, sizeof state);
-	  lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], charset,
-				      &state);
+	  lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], &state);
 	  *lc_ctype_ptr++ = '\0';
 	}
     }
@@ -885,8 +877,7 @@ __set_lc_monetary_from_win (const char *name,
 							  LOCALE_SCURRENCY);
       /* As on Linux:  If the currency_symbol can't be represented in the
 	 given charset, use int_curr_symbol. */
-      if (lc_wcstombs (f_wctomb, charset, NULL,
-		       _monetary_locale->wcurrency_symbol,
+      if (lc_wcstombs (f_wctomb, NULL, _monetary_locale->wcurrency_symbol,
 		       0, true) == (size_t) -1)
 	_monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
       else
@@ -1026,10 +1017,10 @@ __set_lc_messages_from_win (const char *name,
   len += (strlen (charset) + 1);
   if (lcid)
     {
-      len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesexpr, 0) + 1;
-      len += lc_wcstombs (f_wctomb, charset, NULL, msg->noexpr, 0) + 1;
-      len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesstr, 0) + 1;
-      len += lc_wcstombs (f_wctomb, charset, NULL, msg->nostr, 0) + 1;
+      len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1;
+      len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1;
+      len += lc_wcstombs (f_wctomb, NULL, msg->yesstr, 0) + 1;
+      len += lc_wcstombs (f_wctomb, NULL, msg->nostr, 0) + 1;
       len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
       len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
       len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
@@ -1051,13 +1042,13 @@ __set_lc_messages_from_win (const char *name,
   if (lcid)
     {
       _messages_locale->yesexpr = (const char *) c;
-      len = lc_wcstombs (f_wctomb, charset, c, msg->yesexpr, lc_messages_end - c);
+      len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c);
       _messages_locale->noexpr = (const char *) (c += len + 1);
-      len = lc_wcstombs (f_wctomb, charset, c, msg->noexpr, lc_messages_end - c);
+      len = lc_wcstombs (f_wctomb, c, msg->noexpr, lc_messages_end - c);
       _messages_locale->yesstr = (const char *) (c += len + 1);
-      len = lc_wcstombs (f_wctomb, charset, c, msg->yesstr, lc_messages_end - c);
+      len = lc_wcstombs (f_wctomb, c, msg->yesstr, lc_messages_end - c);
       _messages_locale->nostr = (const char *) (c += len + 1);
-      len = lc_wcstombs (f_wctomb, charset, c, msg->nostr, lc_messages_end - c);
+      len = lc_wcstombs (f_wctomb, c, msg->nostr, lc_messages_end - c);
       c += len + 1;
       if ((uintptr_t) c % 1)
 	++c;
@@ -1149,15 +1140,14 @@ strcoll (const char *__restrict s1, const char *__restrict s2)
   /* The ANSI version of CompareString uses the default charset of the lcid,
      so we must use the Unicode version. */
   mbtowc_p collate_mbtowc = __get_current_collate_locale ()->mbtowc;
-  const char *collate_charset = __get_current_collate_locale ()->codeset;
-  n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1;
+  n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1;
   ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
 			  : tp.w_get ());
-  lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1);
-  n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
+  lc_mbstowcs (collate_mbtowc, ws1, s1, n1);
+  n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
   ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
 			  : tp.w_get ());
-  lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
+  lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
   ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
   if (n1 > NT_MAX_PATH)
     free (ws1);
@@ -1226,13 +1216,12 @@ strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn)
   /* The ANSI version of LCMapString uses the default charset of the lcid,
      so we must use the Unicode version. */
   mbtowc_p collate_mbtowc = __get_current_collate_locale ()->mbtowc;
-  const char *collate_charset = __get_current_collate_locale ()->codeset;
-  n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
+  n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
   ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
 			  : tp.w_get ());
   if (ws2)
     {
-      lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
+      lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
       /* The sort key is a NUL-terminated byte string. */
       ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1,
 			  (PWCHAR) s1, sn);
@@ -1474,7 +1463,7 @@ __set_locale_from_locale_alias (const char *locale, char *new_locale)
       if (strlen (replace) > ENCODING_LEN)
 	continue;
       /* The file is latin1 encoded */
-      lc_mbstowcs (__iso_mbtowc, "ISO-8859-1", walias, alias, ENCODING_LEN + 1);
+      lc_mbstowcs (__iso_mbtowc (1), walias, alias, ENCODING_LEN + 1);
       walias[ENCODING_LEN] = L'\0';
       if (!wcscmp (wlocale, walias))
 	{
@@ -1503,33 +1492,25 @@ internal_setlocale ()
   wchar_t *w_path = NULL, *w_cwd;
 
   /* Don't do anything if the charset hasn't actually changed. */
-  if (strcmp (cygheap->locale.charset, __locale_charset ()) == 0)
+  if (cygheap->locale.mbtowc == __global_locale.mbtowc)
     return;
 
-  debug_printf ("Cygwin charset changed from %s to %s",
-		cygheap->locale.charset, __locale_charset ());
+  debug_printf ("Cygwin charset chang to %s", __locale_charset ());
   /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
   path = getenv ("PATH");
   if (path && *path)	/* $PATH can be potentially unset. */
     {
       w_path = tp.w_get ();
-      sys_mbstowcs (w_path, 32768, path);
+      sys_cp_mbstowcs (cygheap->locale.mbtowc, w_path, 32768, path);
     }
   w_cwd = tp.w_get ();
   cwdstuff::cwd_lock.acquire ();
-  sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
+  sys_cp_mbstowcs (cygheap->locale.mbtowc, w_cwd, 32768,
+		   cygheap->cwd.get_posix ());
   /* Set charset for internal conversion functions. */
-  if (*__locale_charset () == 'A'/*SCII*/)
-    {
-      cygheap->locale.mbtowc = __utf8_mbtowc;
-      cygheap->locale.wctomb = __utf8_wctomb;
-    }
-  else
-    {
-      cygheap->locale.mbtowc = __mbtowc;
-      cygheap->locale.wctomb = __wctomb;
-    }
-  strcpy (cygheap->locale.charset, __locale_charset ());
+  cygheap->locale.mbtowc = __global_locale.mbtowc;
+  if (cygheap->locale.mbtowc == __ascii_mbtowc)
+    cygheap->locale.mbtowc = __utf8_mbtowc;
   /* Restore CWD and PATH in new charset. */
   cygheap->cwd.reset_posix (w_cwd);
   cwdstuff::cwd_lock.release ();
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 40f2c29..c962f7c 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -140,15 +140,13 @@ __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
 }
 
 extern "C" int
-__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
-	       mbstate_t *state)
+__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
   return __db_wctomb (r,s, wchar, 932);
 }
 
 extern "C" int
-__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
-	       mbstate_t *state)
+__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
   /* Unfortunately, the Windows eucJP codepage 20932 is not really 100%
      compatible to eucJP.  It's a cute approximation which makes it a
@@ -192,22 +190,19 @@ __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
 }
 
 extern "C" int
-__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
-	       mbstate_t *state)
+__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
   return __db_wctomb (r,s, wchar, 936);
 }
 
 extern "C" int
-__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
-	       mbstate_t *state)
+__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
   return __db_wctomb (r,s, wchar, 949);
 }
 
 extern "C" int
-__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
-	       mbstate_t *state)
+__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
   return __db_wctomb (r,s, wchar, 950);
 }
@@ -268,14 +263,14 @@ __db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, UINT cp,
 
 extern "C" int
 __sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
-	       const char *charset, mbstate_t *state)
+	       mbstate_t *state)
 {
   return __db_mbtowc (r, pwc, s, n, 932, state);
 }
 
 extern "C" int
 __eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
-		const char *charset, mbstate_t *state)
+		mbstate_t *state)
 {
   /* See comment in __eucjp_wctomb above. */
   wchar_t dummy;
@@ -352,21 +347,21 @@ jis_x_0212:
 
 extern "C" int
 __gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
-	       const char *charset, mbstate_t *state)
+	      mbstate_t *state)
 {
   return __db_mbtowc (r, pwc, s, n, 936, state);
 }
 
 extern "C" int
 __kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
-	       const char *charset, mbstate_t *state)
+	     mbstate_t *state)
 {
   return __db_mbtowc (r, pwc, s, n, 949, state);
 }
 
 extern "C" int
 __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
-	       const char *charset, mbstate_t *state)
+	       mbstate_t *state)
 {
   return __db_mbtowc (r, pwc, s, n, 950, state);
 }
@@ -408,7 +403,7 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 */
 static size_t __reg3
 sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
-		bool is_path)
+	      bool is_path)
 {
   char buf[10];
   char *ptr = dst;
@@ -416,9 +411,10 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
   size_t n = 0;
   mbstate_t ps;
   save_errno save;
-  wctomb_p f_wctomb = cygheap->locale.wctomb;
-  const char *charset = cygheap->locale.charset;
+  wctomb_p f_wctomb = __WCTOMB;
 
+  if (f_wctomb == __ascii_wctomb)
+    f_wctomb = __utf8_wctomb;
   memset (&ps, 0, sizeof ps);
   if (dst == NULL)
     len = (size_t) -1;
@@ -441,13 +437,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
 	}
       else
 	{
-	  bytes = f_wctomb (_REENT, buf, pw, charset, &ps);
-	  if (bytes == -1 && *charset != 'U'/*TF-8*/)
+	  bytes = f_wctomb (_REENT, buf, pw, &ps);
+	  if (bytes == -1 && f_wctomb != __utf8_wctomb)
 	    {
 	      /* Convert chars invalid in the current codepage to a sequence
 		 ASCII CAN; UTF-8 representation of invalid char. */
 	      buf[0] = 0x18; /* ASCII CAN */
-	      bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps);
+	      bytes = __utf8_wctomb (_REENT, buf + 1, pw, &ps);
 	      if (bytes == -1)
 		{
 		  ++pwcs;
@@ -465,8 +461,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
 		      ps.__count = 0;
 		      continue;
 		    }
-		  bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, charset,
-					  &ps);
+		  bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, &ps);
 		  nwc--;
 		}
 	    }
@@ -557,8 +552,8 @@ sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src,
    charset, which is the charset returned by GetConsoleCP ().  Most of the
    time this is used for box and line drawing characters. */
 size_t __reg3
-sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
-		 size_t dlen, const char *src, size_t nms)
+sys_cp_mbstowcs (mbtowc_p f_mbtowc, wchar_t *dst, size_t dlen,
+		 const char *src, size_t nms)
 {
   wchar_t *ptr = dst;
   unsigned const char *pmbs = (unsigned const char *) src;
@@ -581,10 +576,11 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
 	     next byte must be a valid UTF-8 start byte.  If the charset
 	     isn't UTF-8 anyway, try to convert the following bytes as UTF-8
 	     sequence. */
-	  if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4 && *charset != 'U'/*TF-8*/)
+	  if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4
+	      && f_mbtowc != __utf8_mbtowc)
 	    {
 	      bytes = __utf8_mbtowc (_REENT, ptr, (const char *) pmbs + 1,
-				     nms - 1, charset, &ps);
+				     nms - 1, &ps);
 	      if (bytes < 0)
 		{
 		  /* Invalid UTF-8 sequence?  Treat the ASCII CAN character as
@@ -603,7 +599,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
 		      wchar_t *ptr2 = dst ? ptr + 1 : NULL;
 		      int bytes2 = __utf8_mbtowc (_REENT, ptr2,
 						  (const char *) pmbs + bytes,
-						  nms - bytes, charset, &ps);
+						  nms - bytes, &ps);
 		      if (bytes2 < 0)
 			memset (&ps, 0, sizeof ps);
 		      else
@@ -625,7 +621,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
 	    }
 	}
       else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms,
-				  charset, &ps)) < 0)
+				  &ps)) < 0)
 	{
 	  /* The technique is based on a discussion here:
 	     http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
@@ -668,8 +664,10 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
 size_t __reg3
 sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, size_t nms)
 {
-  return sys_cp_mbstowcs (cygheap->locale.mbtowc, cygheap->locale.charset,
-			  dst, dlen, src, nms);
+  mbtowc_p f_mbtowc = __MBTOWC;
+  if (f_mbtowc == __ascii_mbtowc)
+    f_mbtowc = __utf8_mbtowc;
+  return sys_cp_mbstowcs (f_mbtowc, dst, dlen, src, nms);
 }
 
 /* Same as sys_wcstombs_alloc, just backwards. */
diff --git a/winsup/cygwin/wchar.h b/winsup/cygwin/wchar.h
index 1bffd63..b3dacf3 100644
--- a/winsup/cygwin/wchar.h
+++ b/winsup/cygwin/wchar.h
@@ -11,6 +11,9 @@ details. */
 
 #include_next <wchar.h>
 
+/* Internal headers from newlib */
+#include "../locale/setlocale.h"
+
 #define ENCODING_LEN 31
 
 #ifdef __cplusplus
@@ -18,29 +21,23 @@ extern "C" {
 #endif
 
 typedef int mbtowc_f (struct _reent *, wchar_t *, const char *, size_t,
-		      const char *, mbstate_t *);
+		      mbstate_t *);
 typedef mbtowc_f *mbtowc_p;
 
-extern mbtowc_p __mbtowc;
 extern mbtowc_f __ascii_mbtowc;
 extern mbtowc_f __utf8_mbtowc;
-extern mbtowc_f __iso_mbtowc;
-extern mbtowc_f __cp_mbtowc;
-extern mbtowc_f __sjis_mbtowc;
-extern mbtowc_f __eucjp_mbtowc;
-extern mbtowc_f __gbk_mbtowc;
-extern mbtowc_f __kr_mbtowc;
-extern mbtowc_f __big5_mbtowc;
-
-typedef int wctomb_f (struct _reent *, char *, wchar_t, const char *,
-		      mbstate_t *);
+extern mbtowc_p __iso_mbtowc (int);
+extern mbtowc_p __cp_mbtowc (int);
+
+#define __MBTOWC (__get_current_locale ()->mbtowc)
+
+typedef int wctomb_f (struct _reent *, char *, wchar_t, mbstate_t *);
 typedef wctomb_f *wctomb_p;
 
-extern wctomb_p __wctomb;
 extern wctomb_f __ascii_wctomb;
 extern wctomb_f __utf8_wctomb;
 
-extern char *__locale_charset ();
+#define __WCTOMB (__get_current_locale ()->wctomb)
 
 #ifdef __cplusplus
 }
@@ -49,20 +46,21 @@ extern char *__locale_charset ();
 #ifdef __INSIDE_CYGWIN__
 #ifdef __cplusplus
 size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t * src,
-			       size_t nwc = (size_t) -1);
+			    size_t nwc = (size_t) -1);
 size_t __reg3 sys_wcstombs_no_path (char *dst, size_t len,
-			       const wchar_t * src, size_t nwc = (size_t) -1);
+				    const wchar_t * src,
+				    size_t nwc = (size_t) -1);
 size_t __reg3 sys_wcstombs_alloc (char **, int, const wchar_t *,
-				     size_t = (size_t) -1);
+				  size_t = (size_t) -1);
 size_t __reg3 sys_wcstombs_alloc_no_path (char **, int, const wchar_t *,
-				     size_t = (size_t) -1);
+					  size_t = (size_t) -1);
 
-size_t __reg3 sys_cp_mbstowcs (mbtowc_p, const char *, wchar_t *, size_t,
-				  const char *, size_t = (size_t) -1);
+size_t __reg3 sys_cp_mbstowcs (mbtowc_p, wchar_t *, size_t, const char *,
+			       size_t = (size_t) -1);
 size_t __reg3 sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src,
-		     size_t nms = (size_t) -1);
+			    size_t nms = (size_t) -1);
 size_t __reg3 sys_mbstowcs_alloc (wchar_t **, int, const char *,
-				     size_t = (size_t) -1);
+				  size_t = (size_t) -1);
 #endif /* __cplusplus */
 #endif /* __INSIDE_CYGWIN__ */
author	Corinna Vinschen <corinna@vinschen.de>	2016-07-20 22:05:59 +0200
committer	Corinna Vinschen <corinna@vinschen.de>	2016-08-15 10:56:57 +0200
commit	d16a56306d63b4d94412b479a8ea83463a3514ab (patch)
tree	b37c2954976f314628637e660e039f1be4034b1c /winsup
parent	88208d3735821df0f5a66c5f8781282a7f5bf284 (diff)
download	newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.zip newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.tar.gz newlib-d16a56306d63b4d94412b479a8ea83463a3514ab.tar.bz2