From 62755474e58f3238fb1c672dcbcf91182481c23f Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Tue, 7 Apr 2009 12:13:37 +0000 Subject: * fhandler.h (class dev_console): Add members con_mbtowc, con_wctomb, and con_charset. (dev_console::str_to_con): Take mbtowc function pointer and charset as additional parameters. * fhandler_console.cc (fhandler_console::get_tty_stuff): Initialize aforementioned new members. Explain why. (dev_console::con_to_str): Remove useless comment. Call new sys_cp_wcstombs function rather than sys_wcstombs. (dev_console::str_to_con): Take mbtowc function pointer and charset as additional parameters. Call sys_cp_mbstowcs accordingly. (fhandler_console::write_normal): Only initialize f_mbtowc and charset once. Accommodate changed str_to_con. * strfuncs.cc (sys_cp_wcstombs): Renamed from sys_wcstombs. Take wctomb function pointer and charset as parameters. Use throughout. (sys_cp_mbstowcs): Take wctomb function pointer and charset as parameters instead of codepage. Remove matching local variables and their initialization. * wchar.h (ENCODING_LEN): Define as in newlib. (__mbtowc): Use mbtowc_p typedef for declaration. (wctomb_f): New type. (wctomb_p): New type. (__wctomb): Declare. (__utf8_wctomb): Use wctomb_f typedef for declaration. (sys_cp_wcstombs): Move declaration from winsup.h here. (sys_wcstombs): Ditto. (sys_wcstombs_alloc): Ditto. (sys_cp_mbstowcs): Ditto. (sys_mbstowcs): Ditto. (sys_mbstowcs_alloc): Ditto. * winsup.h: Move declaration of sys_FOO functions to wchar.h. Include wchar.h instead. --- winsup/cygwin/ChangeLog | 34 ++++++++++++++++++++++++++++ winsup/cygwin/fhandler.h | 6 ++++- winsup/cygwin/fhandler_console.cc | 47 +++++++++++++++++++++++++++++---------- winsup/cygwin/strfuncs.cc | 26 +++++++++------------- winsup/cygwin/wchar.h | 42 +++++++++++++++++++++++++++++----- winsup/cygwin/winsup.h | 18 ++------------- 6 files changed, 123 insertions(+), 50 deletions(-) diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index 31ba03b..42927ad 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,37 @@ +2009-04-07 Corinna Vinschen + + * fhandler.h (class dev_console): Add members con_mbtowc, con_wctomb, + and con_charset. + (dev_console::str_to_con): Take mbtowc function pointer and charset + as additional parameters. + * fhandler_console.cc (fhandler_console::get_tty_stuff): Initialize + aforementioned new members. Explain why. + (dev_console::con_to_str): Remove useless comment. Call new + sys_cp_wcstombs function rather than sys_wcstombs. + (dev_console::str_to_con): Take mbtowc function pointer and charset + as additional parameters. Call sys_cp_mbstowcs accordingly. + (fhandler_console::write_normal): Only initialize f_mbtowc and charset + once. Accommodate changed str_to_con. + * strfuncs.cc (sys_cp_wcstombs): Renamed from sys_wcstombs. Take + wctomb function pointer and charset as parameters. Use throughout. + (sys_cp_mbstowcs): Take wctomb function pointer and charset as + parameters instead of codepage. Remove matching local variables and + their initialization. + * wchar.h (ENCODING_LEN): Define as in newlib. + (__mbtowc): Use mbtowc_p typedef for declaration. + (wctomb_f): New type. + (wctomb_p): New type. + (__wctomb): Declare. + (__utf8_wctomb): Use wctomb_f typedef for declaration. + (sys_cp_wcstombs): Move declaration from winsup.h here. + (sys_wcstombs): Ditto. + (sys_wcstombs_alloc): Ditto. + (sys_cp_mbstowcs): Ditto. + (sys_mbstowcs): Ditto. + (sys_mbstowcs_alloc): Ditto. + * winsup.h: Move declaration of sys_FOO functions to wchar.h. Include + wchar.h instead. + 2009-04-06 Earl Chew * libc/rexec.cc (ruserpass): Use fstat64 instead of fstat. diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h index 7dff68d..be4c080 100644 --- a/winsup/cygwin/fhandler.h +++ b/winsup/cygwin/fhandler.h @@ -894,9 +894,13 @@ class dev_console bool use_mouse; bool raw_win32_keyboard_mode; + mbtowc_p con_mbtowc; + wctomb_p con_wctomb; + char con_charset[ENCODING_LEN + 1]; + inline UINT get_console_cp (); DWORD con_to_str (char *d, int dlen, WCHAR w); - DWORD str_to_con (PWCHAR d, const char *s, DWORD sz); + DWORD str_to_con (mbtowc_p, char *, PWCHAR d, const char *s, DWORD sz); void set_color (HANDLE); bool fillin_info (HANDLE); void set_default_attr (); diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc index 976affc..88c4d2c 100644 --- a/winsup/cygwin/fhandler_console.cc +++ b/winsup/cygwin/fhandler_console.cc @@ -1,7 +1,7 @@ /* fhandler_console.cc Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2008 Red Hat, Inc. + 2006, 2008, 2009 Red Hat, Inc. This file is part of Cygwin. @@ -97,6 +97,25 @@ fhandler_console::get_tty_stuff (int flags = 0) dev_state->meta_mask |= RIGHT_ALT_PRESSED; dev_state->set_default_attr (); shared_console_info->tty_min_state.sethwnd ((HWND) INVALID_HANDLE_VALUE); + + /* Set the console charset and the mb<->wc conversion functions from + the current locale the first time the shared console info is created. + When this initialization is called, the current locale is the one + used when reading the environment. This way we get a console setting + which matches the setting of LC_ALL/LC_CTYPE/LANG at the time the + first Cygwin process in this console starts. + + This has an interesting effect. If none of the above environment + variables is set, the setting is equivalent to before when + CYGWIN=codepage was not set: The console charset will be the + default ANSI codepage. So it's sort of backward compatible. + + TODO: Find out if that's a feasible approach. It might be better + in the long run to have a distinct console charset environment + variable. */ + dev_state->con_mbtowc = __mbtowc; + dev_state->con_wctomb = __wctomb; + strcpy (dev_state->con_charset, __locale_charset ()); } return &shared_console_info->tty_min_state; @@ -122,13 +141,10 @@ tty_list::get_tty (int n) return &nada; } -/* The results of GetConsoleCP() and GetConsoleOutputCP() cannot be - cached, because a program or the user can change these values at - any time. */ inline DWORD dev_console::con_to_str (char *d, int dlen, WCHAR w) { - return sys_wcstombs (d, dlen, &w, 1); + return sys_cp_wcstombs (con_wctomb, con_charset, d, dlen, &w, 1); } inline UINT @@ -138,9 +154,10 @@ dev_console::get_console_cp () } inline DWORD -dev_console::str_to_con (PWCHAR d, const char *s, DWORD sz) +dev_console::str_to_con (mbtowc_p f_mbtowc, char *charset, + PWCHAR d, const char *s, DWORD sz) { - return sys_cp_mbstowcs (get_console_cp (), d, CONVERT_LIMIT, s, sz); + return sys_cp_mbstowcs (f_mbtowc, charset, d, CONVERT_LIMIT, s, sz); } bool @@ -1423,12 +1440,17 @@ fhandler_console::write_normal (const unsigned char *src, size_t ret; mbstate_t ps; UINT cp = dev_state->get_console_cp (); - char charsetbuf[32]; - char *charset = __locale_charset (); - mbtowc_p f_mbtowc = __mbtowc; + char charsetbuf[ENCODING_LEN + 1]; + char *charset; + mbtowc_p f_mbtowc; if (cp) f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf); + else + { + f_mbtowc = dev_state->con_mbtowc; + charset = dev_state->con_charset; + } /* First check if we have cached lead bytes of a former try to write a truncated multibyte sequence. If so, process it. */ @@ -1464,7 +1486,7 @@ fhandler_console::write_normal (const unsigned char *src, /* Valid multibyte sequence? Process. */ if (nfound) { - buf_len = dev_state->str_to_con (write_buf, + buf_len = dev_state->str_to_con (f_mbtowc, charset, write_buf, (const char *) trunc_buf.buf, nfound - trunc_buf.buf); WriteConsoleW (get_output_handle (), write_buf, buf_len, &done, 0); @@ -1504,7 +1526,8 @@ fhandler_console::write_normal (const unsigned char *src, if (found != src) { DWORD len = found - src; - buf_len = dev_state->str_to_con (write_buf, (const char *) src, len); + buf_len = dev_state->str_to_con (f_mbtowc, charset, write_buf, + (const char *) src, len); if (!buf_len) { debug_printf ("conversion error, handle %p", diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 6a1783f..9e48758 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -312,9 +312,8 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, Called from newlib's setlocale() with the current ANSI codepage, if the charset isn't given explicitely in the POSIX compatible locale specifier. The function also returns a pointer to the corresponding _mbtowc_r - function. This is used below in the sys_cp_mbstowcs function which - is called directly from fhandler_console if the "Alternate Charset" has - been switched on by an escape sequence. */ + function. Also called from fhandler_console::write_normal() if the + "Alternate Charset" has been switched on by an escape sequence. */ extern "C" mbtowc_p __set_charset_from_codepage (UINT cp, char *charset) { @@ -409,7 +408,8 @@ __set_charset_from_codepage (UINT cp, char *charset) If the result is truncated due to buffer size, it's a bug in Cygwin and the buffer in the calling function should be raised. */ size_t __stdcall -sys_wcstombs (char *dst, size_t len, const PWCHAR src, size_t nwc) +sys_cp_wcstombs (wctomb_p f_wctomb, char *charset, char *dst, size_t len, + const PWCHAR src, size_t nwc) { char buf[10]; char *ptr = dst; @@ -427,13 +427,13 @@ sys_wcstombs (char *dst, size_t len, const PWCHAR src, size_t nwc) path names) is transform_chars in path.cc. */ if ((pw & 0xff00) == 0xf000) pw &= 0xff; - int bytes = _wctomb_r (_REENT, buf, pw, &ps); + int bytes = f_wctomb (_REENT, buf, pw, charset, &ps); /* Convert chars invalid in the current codepage to a sequence ASCII SO; UTF-8 representation of invalid char. */ - if (bytes == -1 && *__locale_charset () != 'U'/*TF-8*/) + if (bytes == -1 && *charset != 'U'/*TF-8*/) { buf[0] = 0x0e; /* ASCII SO */ - bytes = __utf8_wctomb (_REENT, buf + 1, pw, __locale_charset (), &ps); + bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps); if (bytes == -1) { ++pwcs; @@ -450,8 +450,7 @@ sys_wcstombs (char *dst, size_t len, const PWCHAR src, size_t nwc) ps.__count = 0; continue; } - bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, - __locale_charset (), &ps); + bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, charset, &ps); } } if (n + bytes <= len) @@ -514,7 +513,8 @@ sys_wcstombs_alloc (char **dst_p, int type, const PWCHAR src, size_t nwc) charset, which is the charset returned by GetConsoleCP (). Most of the time this is used for box and line drawing characters. */ size_t __stdcall -sys_cp_mbstowcs (UINT cp, PWCHAR dst, size_t dlen, const char *src, size_t nms) +sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, PWCHAR dst, size_t dlen, + const char *src, size_t nms) { wchar_t *ptr = dst; char *pmbs = (char *) src; @@ -522,12 +522,6 @@ sys_cp_mbstowcs (UINT cp, PWCHAR dst, size_t dlen, const char *src, size_t nms) size_t len = dlen; int bytes; mbstate_t ps; - char charsetbuf[32]; - char *charset = __locale_charset (); - mbtowc_p f_mbtowc = __mbtowc; - - if (cp) - f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf); memset (&ps, 0, sizeof ps); if (dst == NULL) diff --git a/winsup/cygwin/wchar.h b/winsup/cygwin/wchar.h index 51e71bb..e6cccd4 100644 --- a/winsup/cygwin/wchar.h +++ b/winsup/cygwin/wchar.h @@ -13,6 +13,8 @@ details. */ #include_next +#define ENCODING_LEN 31 + #ifdef __cplusplus extern "C" { #endif @@ -21,16 +23,19 @@ typedef int mbtowc_f (struct _reent *, wchar_t *, const char *, size_t, const char *, mbstate_t *); typedef mbtowc_f *mbtowc_p; -extern int __utf8_wctomb (struct _reent *, char *, wchar_t, - const char *, mbstate_t *); - -extern int (*__mbtowc) (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); +extern mbtowc_p __mbtowc; extern mbtowc_f __ascii_mbtowc; extern mbtowc_f __utf8_mbtowc; extern mbtowc_f __iso_mbtowc; extern mbtowc_f __cp_mbtowc; +typedef int wctomb_f (struct _reent *, char *, wchar_t, const char *, + mbstate_t *); +typedef wctomb_f *wctomb_p; + +extern wctomb_p __wctomb; +extern wctomb_f __utf8_wctomb; + extern char *__locale_charset (); extern mbtowc_p __set_charset_from_codepage (unsigned int cp, char *charset); @@ -38,4 +43,31 @@ extern mbtowc_p __set_charset_from_codepage (unsigned int cp, char *charset); #ifdef __cplusplus } #endif + +size_t __stdcall sys_cp_wcstombs (wctomb_p, char *, char *, size_t, + const PWCHAR, size_t = (size_t) -1) + __attribute__ ((regparm(3))); +inline size_t +__stdcall sys_wcstombs (char *dst, size_t len, const PWCHAR src, + size_t nwc = (size_t) -1) +{ + return sys_cp_wcstombs (__wctomb, __locale_charset (), dst, len, src, nwc); +} +size_t __stdcall sys_wcstombs_alloc (char **, int, const PWCHAR, + size_t = (size_t) -1) + __attribute__ ((regparm(3))); + +size_t __stdcall sys_cp_mbstowcs (mbtowc_p, char *, PWCHAR, size_t, + const char *, size_t = (size_t) -1) + __attribute__ ((regparm(3))); +inline size_t +sys_mbstowcs (PWCHAR dst, size_t dlen, const char *src, + size_t nms = (size_t) -1) +{ + return sys_cp_mbstowcs (__mbtowc, __locale_charset (), dst, dlen, src, nms); +} +size_t __stdcall sys_mbstowcs_alloc (PWCHAR *, int, const char *, + size_t = (size_t) -1) + __attribute__ ((regparm(3))); + #endif /* _CYGWIN_WCHAR_H */ diff --git a/winsup/cygwin/winsup.h b/winsup/cygwin/winsup.h index 44f7caa..5ba9d0c 100644 --- a/winsup/cygwin/winsup.h +++ b/winsup/cygwin/winsup.h @@ -108,22 +108,6 @@ extern "C" DWORD WINAPI GetLastError (void); description see there. */ #define HEAP_NOTHEAP -1 -size_t __stdcall sys_wcstombs (char *, size_t, const PWCHAR, size_t = (size_t) -1) - __attribute__ ((regparm(3))); -size_t __stdcall sys_wcstombs_alloc (char **, int, const PWCHAR, size_t = (size_t) -1) - __attribute__ ((regparm(3))); - -size_t __stdcall sys_cp_mbstowcs (UINT, PWCHAR, size_t, const char *, size_t = (size_t) -1) - __attribute__ ((regparm(3))); -inline size_t -sys_mbstowcs (PWCHAR dst, size_t dlen, const char *src, - size_t nms = (size_t) -1) -{ - return sys_cp_mbstowcs (0, dst, dlen, src, nms); -} -size_t __stdcall sys_mbstowcs_alloc (PWCHAR *, int, const char *, size_t = (size_t) -1) - __attribute__ ((regparm(3))); - /* Used to check if Cygwin DLL is dynamically loaded. */ extern int cygserver_running; @@ -134,6 +118,8 @@ extern int cygserver_running; #include "debug.h" +#include + /**************************** Convenience ******************************/ /* Used to define status flag accessor methods */ -- cgit v1.1