diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 10:13:27 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 10:13:27 +0000 |
commit | 28186e81d947a830d9895cecc2d8e836a3cbccd0 (patch) | |
tree | 8ef79212f3476f0b1fbbef8fa46b97ea297c6fe3 /newlib/libc/locale | |
parent | 0258b687228f0d9d5191615ba0a13f7496f09d3b (diff) | |
download | newlib-28186e81d947a830d9895cecc2d8e836a3cbccd0.zip newlib-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.gz newlib-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.bz2 |
* libc/ctype/iswalpha.c: Handle all wchar_t as unicode on
_MB_CAPABLE systems.
* libc/ctype/iswblank.c: Ditto.
* libc/ctype/iswcntrl.c: Ditto.
* libc/ctype/iswprint.c: Ditto.
* libc/ctype/iswpunct.c: Ditto.
* libc/ctype/iswspace.c: Ditto.
* libc/ctype/jp2uc.c (__jp2uc): On Cygwin, just return c.
Explain why.
* libc/ctype/towlower.c: Ditto.
* libc/ctype/towupper.c: Ditto.
* libc/include/sys/config.h: Define _MB_EXTENDED_CHARSETS_ISO
and _MB_EXTENDED_CHARSETS_WINDOWS if _MB_EXTENDED_CHARSETS_ALL is
defined. Define _MB_EXTENDED_CHARSETS_ALL on Cygwin only for now.
* libc/include/sys/reent.h (struct _reent): Mark _current_category
and _current_locale as unused.
* libc/locale/locale.c: Add new charset support to documentation.
Include ../stdio/local.h from here.
(lc_ctype_charset): Set to "ASCII" by default.
(lc_message_charset): Ditto.
(_setlocale_r): Don't set _current_category and _current_locale.
(loadlocale): Add Cygwin codepage support. On _MB_CAPABLE
systems, set __mbtowc and __wctomb function pointers to function
corresponding with current charset. Don't allow non-existant
ISO-8859-12 charset. Add support for Windows singlebyte codepages.
On Cygwin, add support for GBK, CP949, and BIG5. On Cygwin,
call __set_ctype() in case the catorgy is LC_CTYPE. Don't set
_current_category and _current_locale.
* libc/stdlib/Makefile.am (GENERAL_SOURCES): Add sb_charsets.c.
* libc/stdlib/Makefile.in: Regenerate.
* libc/stdlib/local.h: Add prototype for __locale_charset.
Add prototypes for __mbtowc and __wctomb pointers.
Add prototypes for charset-specific _wctomb_r and _mbtowc_r
functions.
Declare tables and functions from sb_charsets.c.
* libc/stdlib/mbtowc_r.c (__mbtowc): Define. Set to __ascii_mbtowc
by default.
(_mbtowc_r): Just call __mbtowc from here.
(__ascii_mbtowc): New function.
(__iso_mbtowc): New function.
(__cp_mbtowc): New function.
(__utf8_mbtowc): New function.
(__sjis_mbtowc): New function. Disable on Cygwin.
(__eucjp_mbtowc): New function. Disable on Cygwin.
(__jis_mbtowc): New function. Disable on Cygwin.
* libc/stdlib/sb_charsets.c: New file, adding singlebyte to UTF
conversion tables for all ISO and CP charsets.
(__iso_8859_index): New function.
(__cp_index): New function.
* libc/stdlib/wctomb_r.c (__wctomb): Define. Set to __ascii_wctomb
by default.
(_wctomb_r): Just call __wctomb from here.
(__ascii_wctomb): New function.
(__utf8_wctomb): New function.
(__sjis_wctomb): New function. Disable on Cygwin.
(__eucjp_wctomb): New function. Disable on Cygwin.
(__jis_wctomb): New function. Disable on Cygwin.
(__iso_wctomb): New function.
(__cp_wctomb): New function.
Diffstat (limited to 'newlib/libc/locale')
-rw-r--r-- | newlib/libc/locale/locale.c | 169 |
1 files changed, 152 insertions, 17 deletions
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c index a4cd30e..d3644eb 100644 --- a/newlib/libc/locale/locale.c +++ b/newlib/libc/locale/locale.c @@ -47,11 +47,18 @@ and <<"C">> values for <[locale]>; strings representing other locales are not honored unless _MB_CAPABLE is defined in which case POSIX locale strings are allowed, plus five extensions supported for backward compatibility with older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>, -<<"C-SJIS">>, or <<"C-ISO-8859-x">> with 1 <= x <= 15. Even when using -POSIX locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>, -<<"EUCJP">>, <<"SJIS">>, or <<"ISO-8859-x">> with 1 <= x <= 15. (<<"">> is -also accepted; if given, the settings are read from the corresponding -LC_* environment variables and $LANG according to POSIX rules. +<<"C-SJIS">>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with +xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, +1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. Even when using POSIX +locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>, +<<"EUCJP">>, <<"SJIS">>, <<"ISO-8859-x">> with 1 <= x <= 15, or +<<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, +874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. +(<<"">> is also accepted; if given, the settings are read from the +corresponding LC_* environment variables and $LANG according to POSIX rules. + +Under Cygwin, this implementation additionally supports the charsets <<"GBK">>, +<<"CP949">>, and <<"BIG5">>. If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a pointer to the string representing the current locale (always @@ -85,6 +92,9 @@ PORTABILITY ANSI C requires <<setlocale>>, but the only locale required across all implementations is the C locale. +NOTES +There is no ISO-8859-12 codepage. It's also refused by this implementation. + No supporting OS subroutines are required. */ @@ -129,6 +139,11 @@ No supporting OS subroutines are required. #include <limits.h> #include <reent.h> #include <stdlib.h> +#include <wchar.h> +#include "../stdlib/local.h" +#ifdef __CYGWIN__ +#include <windows.h> +#endif #define _LC_LAST 7 #define ENCODING_LEN 31 @@ -190,8 +205,8 @@ static const char *__get_locale_env(struct _reent *, int); #endif -static char lc_ctype_charset[ENCODING_LEN + 1] = "ISO-8859-1"; -static char lc_message_charset[ENCODING_LEN + 1] = "ISO-8859-1"; +static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII"; +static char lc_message_charset[ENCODING_LEN + 1] = "ASCII"; char * _DEFUN(_setlocale_r, (p, category, locale), @@ -205,8 +220,6 @@ _DEFUN(_setlocale_r, (p, category, locale), if (strcmp (locale, "POSIX") && strcmp (locale, "C") && strcmp (locale, "")) return NULL; - p->_current_category = category; - p->_current_locale = locale; } return "C"; #else @@ -361,6 +374,11 @@ currentlocale() #endif #ifdef _MB_CAPABLE +#ifdef __CYGWIN__ +extern void *__set_charset_from_codepage (unsigned int, char *charset); +extern void __set_ctype (const char *charset); +#endif /* __CYGWIN__ */ + static char * loadlocale(struct _reent *p, int category) { @@ -382,7 +400,7 @@ loadlocale(struct _reent *p, int category) if (!strcmp (locale, "POSIX")) strcpy (locale, "C"); if (!strcmp (locale, "C")) /* Default "C" locale */ - strcpy (charset, "ISO-8859-1"); + strcpy (charset, "ASCII"); else if (locale[0] == 'C' && locale[1] == '-') /* Old newlib style */ strcpy (charset, locale + 2); else /* POSIX style */ @@ -414,7 +432,11 @@ loadlocale(struct _reent *p, int category) } else if (c[0] == '\0' || c[0] == '@') /* End of string or just a modifier */ +#ifdef __CYGWIN__ + __set_charset_from_codepage (GetACP (), charset); +#else strcpy (charset, "ISO-8859-1"); +#endif else /* Invalid string */ return NULL; @@ -426,42 +448,155 @@ loadlocale(struct _reent *p, int category) if (strcmp (charset, "UTF-8")) return NULL; mbc_max = 6; +#ifdef _MB_CAPABLE + __wctomb = __utf8_wctomb; + __mbtowc = __utf8_mbtowc; +#endif break; case 'J': if (strcmp (charset, "JIS")) return NULL; mbc_max = 8; +#ifdef _MB_CAPABLE + __wctomb = __jis_wctomb; + __mbtowc = __jis_mbtowc; +#endif break; case 'E': - if (strcmp (charset, "EUCJP")) + if (strcmp (charset, "EUCJP") && strcmp (charset, "eucJP")) return NULL; + strcpy (charset, "EUCJP"); mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __eucjp_wctomb; + __mbtowc = __eucjp_mbtowc; +#endif break; case 'S': if (strcmp (charset, "SJIS")) return NULL; mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __sjis_wctomb; + __mbtowc = __sjis_mbtowc; +#endif break; case 'I': - default: - /* Must be exactly one of ISO-8859-1, [...] ISO-8859-15. */ + /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for + ISO-8859-12. */ if (strncmp (charset, "ISO-8859-", 9)) return NULL; - val = strtol (charset + 9, &end, 10); - if (val < 1 || val > 15 || *end) + val = _strtol_r (p, charset + 9, &end, 10); + if (val < 1 || val > 16 || val == 12 || *end) return NULL; mbc_max = 1; +#ifdef _MB_CAPABLE +#ifdef _MB_EXTENDED_CHARSETS_ISO + __wctomb = __iso_wctomb; + __mbtowc = __iso_mbtowc; +#else /* !_MB_EXTENDED_CHARSETS_ISO */ + __wctomb = __ascii_wctomb; + __mbtowc = __ascii_mbtowc; +#endif /* _MB_EXTENDED_CHARSETS_ISO */ +#endif + break; + case 'C': + if (charset[1] != 'P') + return NULL; + val = _strtol_r (p, charset + 2, &end, 10); + if (*end) + return NULL; + switch (val) + { + case 437: + case 720: + case 737: + case 775: + case 850: + case 852: + case 855: + case 857: + case 858: + case 862: + case 866: + case 874: + case 1125: + case 1250: + case 1251: + case 1252: + case 1253: + case 1254: + case 1255: + case 1256: + case 1257: + case 1258: + mbc_max = 1; +#ifdef _MB_CAPABLE +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS + __wctomb = __cp_wctomb; + __mbtowc = __cp_mbtowc; +#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ + __wctomb = __ascii_wctomb; + __mbtowc = __ascii_mbtowc; +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ +#endif + break; +#ifdef __CYGWIN__ + case 949: + mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __kr_wctomb; + __mbtowc = __kr_mbtowc; +#endif + break; +#endif + default: + return NULL; + } + break; + case 'A': + if (strcmp (charset, "ASCII")) + return NULL; + mbc_max = 1; +#ifdef _MB_CAPABLE + __wctomb = __ascii_wctomb; + __mbtowc = __ascii_mbtowc; +#endif break; +#ifdef __CYGWIN__ + case 'G': + if (strcmp (charset, "GBK")) + return NULL; + mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __gbk_wctomb; + __mbtowc = __gbk_mbtowc; +#endif + break; + case 'B': + if (strcmp (charset, "BIG5") && strcmp (charset, "Big5")) + return NULL; + strcpy (charset, "BIG5"); + mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __big5_wctomb; + __mbtowc = __big5_mbtowc; +#endif + break; +#endif /* __CYGWIN__ */ + default: + return NULL; } if (category == LC_CTYPE) { strcpy (lc_ctype_charset, charset); __mb_cur_max = mbc_max; +#ifdef __CYGWIN__ + __set_ctype (charset); +#endif } else if (category == LC_MESSAGES) strcpy (lc_message_charset, charset); - p->_current_category = category; - p->_current_locale = locale; return strcpy(current_categories[category], new_categories[category]); } |