diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 10:13:27 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 10:13:27 +0000 |
commit | 28186e81d947a830d9895cecc2d8e836a3cbccd0 (patch) | |
tree | 8ef79212f3476f0b1fbbef8fa46b97ea297c6fe3 | |
parent | 0258b687228f0d9d5191615ba0a13f7496f09d3b (diff) | |
download | newlib-28186e81d947a830d9895cecc2d8e836a3cbccd0.zip newlib-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.gz newlib-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.bz2 |
* libc/ctype/iswalpha.c: Handle all wchar_t as unicode on
_MB_CAPABLE systems.
* libc/ctype/iswblank.c: Ditto.
* libc/ctype/iswcntrl.c: Ditto.
* libc/ctype/iswprint.c: Ditto.
* libc/ctype/iswpunct.c: Ditto.
* libc/ctype/iswspace.c: Ditto.
* libc/ctype/jp2uc.c (__jp2uc): On Cygwin, just return c.
Explain why.
* libc/ctype/towlower.c: Ditto.
* libc/ctype/towupper.c: Ditto.
* libc/include/sys/config.h: Define _MB_EXTENDED_CHARSETS_ISO
and _MB_EXTENDED_CHARSETS_WINDOWS if _MB_EXTENDED_CHARSETS_ALL is
defined. Define _MB_EXTENDED_CHARSETS_ALL on Cygwin only for now.
* libc/include/sys/reent.h (struct _reent): Mark _current_category
and _current_locale as unused.
* libc/locale/locale.c: Add new charset support to documentation.
Include ../stdio/local.h from here.
(lc_ctype_charset): Set to "ASCII" by default.
(lc_message_charset): Ditto.
(_setlocale_r): Don't set _current_category and _current_locale.
(loadlocale): Add Cygwin codepage support. On _MB_CAPABLE
systems, set __mbtowc and __wctomb function pointers to function
corresponding with current charset. Don't allow non-existant
ISO-8859-12 charset. Add support for Windows singlebyte codepages.
On Cygwin, add support for GBK, CP949, and BIG5. On Cygwin,
call __set_ctype() in case the catorgy is LC_CTYPE. Don't set
_current_category and _current_locale.
* libc/stdlib/Makefile.am (GENERAL_SOURCES): Add sb_charsets.c.
* libc/stdlib/Makefile.in: Regenerate.
* libc/stdlib/local.h: Add prototype for __locale_charset.
Add prototypes for __mbtowc and __wctomb pointers.
Add prototypes for charset-specific _wctomb_r and _mbtowc_r
functions.
Declare tables and functions from sb_charsets.c.
* libc/stdlib/mbtowc_r.c (__mbtowc): Define. Set to __ascii_mbtowc
by default.
(_mbtowc_r): Just call __mbtowc from here.
(__ascii_mbtowc): New function.
(__iso_mbtowc): New function.
(__cp_mbtowc): New function.
(__utf8_mbtowc): New function.
(__sjis_mbtowc): New function. Disable on Cygwin.
(__eucjp_mbtowc): New function. Disable on Cygwin.
(__jis_mbtowc): New function. Disable on Cygwin.
* libc/stdlib/sb_charsets.c: New file, adding singlebyte to UTF
conversion tables for all ISO and CP charsets.
(__iso_8859_index): New function.
(__cp_index): New function.
* libc/stdlib/wctomb_r.c (__wctomb): Define. Set to __ascii_wctomb
by default.
(_wctomb_r): Just call __wctomb from here.
(__ascii_wctomb): New function.
(__utf8_wctomb): New function.
(__sjis_wctomb): New function. Disable on Cygwin.
(__eucjp_wctomb): New function. Disable on Cygwin.
(__jis_wctomb): New function. Disable on Cygwin.
(__iso_wctomb): New function.
(__cp_wctomb): New function.
-rw-r--r-- | newlib/ChangeLog | 62 | ||||
-rw-r--r-- | newlib/libc/ctype/iswalpha.c | 485 | ||||
-rw-r--r-- | newlib/libc/ctype/iswblank.c | 37 | ||||
-rw-r--r-- | newlib/libc/ctype/iswcntrl.c | 35 | ||||
-rw-r--r-- | newlib/libc/ctype/iswprint.c | 597 | ||||
-rw-r--r-- | newlib/libc/ctype/iswpunct.c | 465 | ||||
-rw-r--r-- | newlib/libc/ctype/iswspace.c | 39 | ||||
-rw-r--r-- | newlib/libc/ctype/jp2uc.c | 6 | ||||
-rw-r--r-- | newlib/libc/ctype/towlower.c | 721 | ||||
-rw-r--r-- | newlib/libc/ctype/towupper.c | 783 | ||||
-rw-r--r-- | newlib/libc/include/sys/config.h | 9 | ||||
-rw-r--r-- | newlib/libc/include/sys/reent.h | 4 | ||||
-rw-r--r-- | newlib/libc/locale/locale.c | 169 | ||||
-rw-r--r-- | newlib/libc/stdlib/Makefile.am | 1 | ||||
-rw-r--r-- | newlib/libc/stdlib/Makefile.in | 48 | ||||
-rw-r--r-- | newlib/libc/stdlib/gdtoa-gethex.c | 27 | ||||
-rw-r--r-- | newlib/libc/stdlib/local.h | 57 | ||||
-rw-r--r-- | newlib/libc/stdlib/mbtowc_r.c | 819 | ||||
-rw-r--r-- | newlib/libc/stdlib/sb_charsets.c | 697 | ||||
-rw-r--r-- | newlib/libc/stdlib/strtod.c | 10 | ||||
-rw-r--r-- | newlib/libc/stdlib/wcstod.c | 22 | ||||
-rw-r--r-- | newlib/libc/stdlib/wctomb_r.c | 464 |
22 files changed, 3361 insertions, 2196 deletions
diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 846d798..3a734d9 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,65 @@ +2009-03-24 Corinna Vinschen <corinna@vinschen.de> + + * libc/ctype/iswalpha.c: Handle all wchar_t as unicode on + _MB_CAPABLE systems. + * libc/ctype/iswblank.c: Ditto. + * libc/ctype/iswcntrl.c: Ditto. + * libc/ctype/iswprint.c: Ditto. + * libc/ctype/iswpunct.c: Ditto. + * libc/ctype/iswspace.c: Ditto. + * libc/ctype/jp2uc.c (__jp2uc): On Cygwin, just return c. + Explain why. + * libc/ctype/towlower.c: Ditto. + * libc/ctype/towupper.c: Ditto. + * libc/include/sys/config.h: Define _MB_EXTENDED_CHARSETS_ISO + and _MB_EXTENDED_CHARSETS_WINDOWS if _MB_EXTENDED_CHARSETS_ALL is + defined. Define _MB_EXTENDED_CHARSETS_ALL on Cygwin only for now. + * libc/include/sys/reent.h (struct _reent): Mark _current_category + and _current_locale as unused. + * libc/locale/locale.c: Add new charset support to documentation. + Include ../stdio/local.h from here. + (lc_ctype_charset): Set to "ASCII" by default. + (lc_message_charset): Ditto. + (_setlocale_r): Don't set _current_category and _current_locale. + (loadlocale): Add Cygwin codepage support. On _MB_CAPABLE + systems, set __mbtowc and __wctomb function pointers to function + corresponding with current charset. Don't allow non-existant + ISO-8859-12 charset. Add support for Windows singlebyte codepages. + On Cygwin, add support for GBK, CP949, and BIG5. On Cygwin, + call __set_ctype() in case the catorgy is LC_CTYPE. Don't set + _current_category and _current_locale. + * libc/stdlib/Makefile.am (GENERAL_SOURCES): Add sb_charsets.c. + * libc/stdlib/Makefile.in: Regenerate. + * libc/stdlib/local.h: Add prototype for __locale_charset. + Add prototypes for __mbtowc and __wctomb pointers. + Add prototypes for charset-specific _wctomb_r and _mbtowc_r + functions. + Declare tables and functions from sb_charsets.c. + * libc/stdlib/mbtowc_r.c (__mbtowc): Define. Set to __ascii_mbtowc + by default. + (_mbtowc_r): Just call __mbtowc from here. + (__ascii_mbtowc): New function. + (__iso_mbtowc): New function. + (__cp_mbtowc): New function. + (__utf8_mbtowc): New function. + (__sjis_mbtowc): New function. Disable on Cygwin. + (__eucjp_mbtowc): New function. Disable on Cygwin. + (__jis_mbtowc): New function. Disable on Cygwin. + * libc/stdlib/sb_charsets.c: New file, adding singlebyte to UTF + conversion tables for all ISO and CP charsets. + (__iso_8859_index): New function. + (__cp_index): New function. + * libc/stdlib/wctomb_r.c (__wctomb): Define. Set to __ascii_wctomb + by default. + (_wctomb_r): Just call __wctomb from here. + (__ascii_wctomb): New function. + (__utf8_wctomb): New function. + (__sjis_wctomb): New function. Disable on Cygwin. + (__eucjp_wctomb): New function. Disable on Cygwin. + (__jis_wctomb): New function. Disable on Cygwin. + (__iso_wctomb): New function. + (__cp_wctomb): New function. + 2009-03-23 Richard Earnshaw <rearnsha@arm.com> * libc/machine/arm/strcmp.c (strcmp): Treat char as unsigned. diff --git a/newlib/libc/ctype/iswalpha.c b/newlib/libc/ctype/iswalpha.c index a2c9cf9..4e2ad6b 100644 --- a/newlib/libc/ctype/iswalpha.c +++ b/newlib/libc/ctype/iswalpha.c @@ -70,270 +70,253 @@ int _DEFUN(iswalpha,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; + unsigned const char *table; + unsigned char *ptr; + unsigned char ctmp; + int size; + wint_t x; if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) + c = __jp2uc (c, JP_EUCJP); + + x = (c >> 8); + /* for some large sections, all characters are alphabetic so handle them here */ + if ((x >= 0x34 && x <= 0x4c) || + (x >= 0x4e && x <= 0x9e) || + (x >= 0xac && x <= 0xd6) || + (x >= 0x200 && x <= 0x2a5)) + return 1; + + switch (x) { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; - } - else if (!strcmp (__locale_charset (), "UTF-8")) - { - unicode = 1; + case 0x00: + table = u0; + size = sizeof(u0); + break; + case 0x01: + case 0x15: + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xf9: + case 0xfc: + case 0x2f8: + case 0x2f9: + return 1; + case 0x02: + table = u2; + size = sizeof(u2); + break; + case 0x03: + table = u3; + size = sizeof(u3); + break; + case 0x04: + table = u4; + size = sizeof(u4); + break; + case 0x05: + table = u5; + size = sizeof(u5); + break; + case 0x06: + table = u6; + size = sizeof(u6); + break; + case 0x07: + table = u7; + size = sizeof(u7); + break; + case 0x09: + table = u9; + size = sizeof(u9); + break; + case 0x0a: + table = ua; + size = sizeof(ua); + break; + case 0x0b: + table = ub; + size = sizeof(ub); + break; + case 0x0c: + table = uc; + size = sizeof(uc); + break; + case 0x0d: + table = ud; + size = sizeof(ud); + break; + case 0x0e: + table = ue; + size = sizeof(ue); + break; + case 0x0f: + table = uf; + size = sizeof(uf); + break; + case 0x10: + table = u10; + size = sizeof(u10); + break; + case 0x11: + table = u11; + size = sizeof(u11); + break; + case 0x12: + table = u12; + size = sizeof(u12); + break; + case 0x13: + table = u13; + size = sizeof(u13); + break; + case 0x14: + table = u14; + size = sizeof(u14); + break; + case 0x16: + table = u16; + size = sizeof(u16); + break; + case 0x17: + table = u17; + size = sizeof(u17); + break; + case 0x18: + table = u18; + size = sizeof(u18); + break; + case 0x1e: + table = u1e; + size = sizeof(u1e); + break; + case 0x1f: + table = u1f; + size = sizeof(u1f); + break; + case 0x20: + table = u20; + size = sizeof(u20); + break; + case 0x21: + table = u21; + size = sizeof(u21); + break; + case 0x24: + table = u24; + size = sizeof(u24); + break; + case 0x30: + table = u30; + size = sizeof(u30); + break; + case 0x31: + table = u31; + size = sizeof(u31); + break; + case 0x4d: + table = u4d; + size = sizeof(u4d); + break; + case 0x9f: + table = u9f; + size = sizeof(u9f); + break; + case 0xa4: + table = ua4; + size = sizeof(ua4); + break; + case 0xd7: + table = ud7; + size = sizeof(ud7); + break; + case 0xfa: + table = ufa; + size = sizeof(ufa); + break; + case 0xfb: + table = ufb; + size = sizeof(ufb); + break; + case 0xfd: + table = ufd; + size = sizeof(ufd); + break; + case 0xfe: + table = ufe; + size = sizeof(ufe); + break; + case 0xff: + table = uff; + size = sizeof(uff); + break; + case 0x103: + table = u103; + size = sizeof(u103); + break; + case 0x104: + table = u104; + size = sizeof(u104); + break; + case 0x1d4: + table = u1d4; + size = sizeof(u1d4); + break; + case 0x1d5: + table = u1d5; + size = sizeof(u1d5); + break; + case 0x1d6: + table = u1d6; + size = sizeof(u1d6); + break; + case 0x1d7: + table = u1d7; + size = sizeof(u1d7); + break; + case 0x2a6: + table = u2a6; + size = sizeof(u2a6); + break; + case 0x2fa: + table = u2fa; + size = sizeof(u2fa); + break; + default: + return 0; } - - if (unicode) + /* we have narrowed down to a section of 256 characters to check */ + /* now check if c matches the alphabetic wide-chars within that section */ + ptr = (unsigned char *)table; + ctmp = (unsigned char)c; + while (ptr < table + size) { - unsigned const char *table; - unsigned char *ptr; - unsigned char ctmp; - int size; - wint_t x = (c >> 8); - - /* for some large sections, all characters are alphabetic so handle them here */ - if ((x >= 0x34 && x <= 0x4c) || - (x >= 0x4e && x <= 0x9e) || - (x >= 0xac && x <= 0xd6) || - (x >= 0x200 && x <= 0x2a5)) + if (ctmp == *ptr) return 1; - - switch (x) - { - case 0x00: - table = u0; - size = sizeof(u0); - break; - case 0x01: - case 0x15: - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xf9: - case 0xfc: - case 0x2f8: - case 0x2f9: - return 1; - case 0x02: - table = u2; - size = sizeof(u2); - break; - case 0x03: - table = u3; - size = sizeof(u3); - break; - case 0x04: - table = u4; - size = sizeof(u4); - break; - case 0x05: - table = u5; - size = sizeof(u5); - break; - case 0x06: - table = u6; - size = sizeof(u6); - break; - case 0x07: - table = u7; - size = sizeof(u7); - break; - case 0x09: - table = u9; - size = sizeof(u9); - break; - case 0x0a: - table = ua; - size = sizeof(ua); - break; - case 0x0b: - table = ub; - size = sizeof(ub); - break; - case 0x0c: - table = uc; - size = sizeof(uc); - break; - case 0x0d: - table = ud; - size = sizeof(ud); - break; - case 0x0e: - table = ue; - size = sizeof(ue); - break; - case 0x0f: - table = uf; - size = sizeof(uf); - break; - case 0x10: - table = u10; - size = sizeof(u10); - break; - case 0x11: - table = u11; - size = sizeof(u11); - break; - case 0x12: - table = u12; - size = sizeof(u12); - break; - case 0x13: - table = u13; - size = sizeof(u13); - break; - case 0x14: - table = u14; - size = sizeof(u14); - break; - case 0x16: - table = u16; - size = sizeof(u16); - break; - case 0x17: - table = u17; - size = sizeof(u17); - break; - case 0x18: - table = u18; - size = sizeof(u18); - break; - case 0x1e: - table = u1e; - size = sizeof(u1e); - break; - case 0x1f: - table = u1f; - size = sizeof(u1f); - break; - case 0x20: - table = u20; - size = sizeof(u20); - break; - case 0x21: - table = u21; - size = sizeof(u21); - break; - case 0x24: - table = u24; - size = sizeof(u24); - break; - case 0x30: - table = u30; - size = sizeof(u30); - break; - case 0x31: - table = u31; - size = sizeof(u31); - break; - case 0x4d: - table = u4d; - size = sizeof(u4d); - break; - case 0x9f: - table = u9f; - size = sizeof(u9f); - break; - case 0xa4: - table = ua4; - size = sizeof(ua4); - break; - case 0xd7: - table = ud7; - size = sizeof(ud7); - break; - case 0xfa: - table = ufa; - size = sizeof(ufa); - break; - case 0xfb: - table = ufb; - size = sizeof(ufb); - break; - case 0xfd: - table = ufd; - size = sizeof(ufd); - break; - case 0xfe: - table = ufe; - size = sizeof(ufe); - break; - case 0xff: - table = uff; - size = sizeof(uff); - break; - case 0x103: - table = u103; - size = sizeof(u103); - break; - case 0x104: - table = u104; - size = sizeof(u104); - break; - case 0x1d4: - table = u1d4; - size = sizeof(u1d4); - break; - case 0x1d5: - table = u1d5; - size = sizeof(u1d5); - break; - case 0x1d6: - table = u1d6; - size = sizeof(u1d6); - break; - case 0x1d7: - table = u1d7; - size = sizeof(u1d7); - break; - case 0x2a6: - table = u2a6; - size = sizeof(u2a6); - break; - case 0x2fa: - table = u2fa; - size = sizeof(u2fa); - break; - default: - return 0; - } - /* we have narrowed down to a section of 256 characters to check */ - /* now check if c matches the alphabetic wide-chars within that section */ - ptr = (unsigned char *)table; - ctmp = (unsigned char)c; - while (ptr < table + size) + if (ctmp < *ptr) + return 0; + /* otherwise c > *ptr */ + /* look for 0x0 as next element which indicates a range */ + ++ptr; + if (*ptr == 0x0) { - if (ctmp == *ptr) + /* we have a range..see if c falls within range */ + ++ptr; + if (ctmp <= *ptr) return 1; - if (ctmp < *ptr) - return 0; - /* otherwise c > *ptr */ - /* look for 0x0 as next element which indicates a range */ ++ptr; - if (*ptr == 0x0) - { - /* we have a range..see if c falls within range */ - ++ptr; - if (ctmp <= *ptr) - return 1; - ++ptr; - } } - /* not in table */ - return 0; } -#endif /* _MB_CAPABLE */ - + /* not in table */ + return 0; +#else return (c < (wint_t)0x100 ? isalpha (c) : 0); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/ctype/iswblank.c b/newlib/libc/ctype/iswblank.c index 4db8ae0..e0601e9 100644 --- a/newlib/libc/ctype/iswblank.c +++ b/newlib/libc/ctype/iswblank.c @@ -66,37 +66,18 @@ int _DEFUN(iswblank,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; - if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) - { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; - } - else if (!strcmp (__locale_charset (), "UTF-8")) - { - unicode = 1; - } - - if (unicode) - { - return (c == 0x0009 || c == 0x0020 || c == 0x1680 || - (c >= 0x2000 && c <= 0x2006) || - (c >= 0x2008 && c <= 0x200b) || - c == 0x205f || c == 0x3000); - } -#endif /* _MB_CAPABLE */ - + c = __jp2uc (c, JP_EUCJP); + return (c == 0x0009 || c == 0x0020 || c == 0x1680 || + (c >= 0x2000 && c <= 0x2006) || + (c >= 0x2008 && c <= 0x200b) || + c == 0x205f || c == 0x3000); +#else return (c < 0x100 ? isblank (c) : 0); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/ctype/iswcntrl.c b/newlib/libc/ctype/iswcntrl.c index b9f9460..2d8a1dd 100644 --- a/newlib/libc/ctype/iswcntrl.c +++ b/newlib/libc/ctype/iswcntrl.c @@ -66,36 +66,17 @@ int _DEFUN(iswcntrl,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; - if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) - { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; - } - else if (!strcmp (__locale_charset (), "UTF-8")) - { - unicode = 1; - } - - if (unicode) - { - return ((c >= 0x0000 && c <= 0x001f) || - (c >= 0x007f && c <= 0x009f) || - c == 0x2028 || c == 0x2029); - } -#endif /* _MB_CAPABLE */ - + c = __jp2uc (c, JP_EUCJP); + return ((c >= 0x0000 && c <= 0x001f) || + (c >= 0x007f && c <= 0x009f) || + c == 0x2028 || c == 0x2029); +#else return (c < 0x100 ? iscntrl (c) : 0); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/ctype/iswprint.c b/newlib/libc/ctype/iswprint.c index 814a26b..a632aa4 100644 --- a/newlib/libc/ctype/iswprint.c +++ b/newlib/libc/ctype/iswprint.c @@ -70,325 +70,308 @@ int _DEFUN(iswprint,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; - + unsigned const char *table; + unsigned char *ptr; + unsigned char ctmp; + int size; + wint_t x; + if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) + c = __jp2uc (c, JP_EUCJP); + + x = (c >> 8); + /* for some large sections, all characters are printuation so handle them here */ + if ((x >= 0x34 && x <= 0x4c) || + (x >= 0x4e && x <= 0x9e) || + (x >= 0xac && x <= 0xd6) || + (x >= 0xe0 && x <= 0xf9) || + (x >= 0x200 && x <= 0x2a5) || + (x >= 0xf00 && x <= 0xffe) || + (x >= 0x1000 && x <= 0x10fe)) + return 1; + + switch (x) { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; + case 0x01: + case 0x15: + case 0x22: + case 0x25: + case 0x28: + case 0x29: + case 0x2a: + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xfc: + case 0x2f8: + case 0x2f9: + return 1; + case 0x00: + table = u0; + size = sizeof(u0); + break; + case 0x02: + table = u2; + size = sizeof(u2); + break; + case 0x03: + table = u3; + size = sizeof(u3); + break; + case 0x04: + table = u4; + size = sizeof(u4); + break; + case 0x05: + table = u5; + size = sizeof(u5); + break; + case 0x06: + table = u6; + size = sizeof(u6); + break; + case 0x07: + table = u7; + size = sizeof(u7); + break; + case 0x09: + table = u9; + size = sizeof(u9); + break; + case 0x0a: + table = ua; + size = sizeof(ua); + break; + case 0x0b: + table = ub; + size = sizeof(ub); + break; + case 0x0c: + table = uc; + size = sizeof(uc); + break; + case 0x0d: + table = ud; + size = sizeof(ud); + break; + case 0x0e: + table = ue; + size = sizeof(ue); + break; + case 0x0f: + table = uf; + size = sizeof(uf); + break; + case 0x10: + table = u10; + size = sizeof(u10); + break; + case 0x11: + table = u11; + size = sizeof(u11); + break; + case 0x12: + table = u12; + size = sizeof(u12); + break; + case 0x13: + table = u13; + size = sizeof(u13); + break; + case 0x14: + table = u14; + size = sizeof(u14); + break; + case 0x16: + table = u16; + size = sizeof(u16); + break; + case 0x17: + table = u17; + size = sizeof(u17); + break; + case 0x18: + table = u18; + size = sizeof(u18); + break; + case 0x1e: + table = u1e; + size = sizeof(u1e); + break; + case 0x1f: + table = u1f; + size = sizeof(u1f); + break; + case 0x20: + table = u20; + size = sizeof(u20); + break; + case 0x21: + table = u21; + size = sizeof(u21); + break; + case 0x23: + table = u23; + size = sizeof(u23); + break; + case 0x24: + table = u24; + size = sizeof(u24); + break; + case 0x26: + table = u26; + size = sizeof(u26); + break; + case 0x27: + table = u27; + size = sizeof(u27); + break; + case 0x2e: + table = u2e; + size = sizeof(u2e); + break; + case 0x2f: + table = u2f; + size = sizeof(u2f); + break; + case 0x30: + table = u30; + size = sizeof(u30); + break; + case 0x31: + table = u31; + size = sizeof(u31); + break; + case 0x32: + table = u32; + size = sizeof(u32); + break; + case 0x33: + table = u33; + size = sizeof(u33); + break; + case 0x4d: + table = u4d; + size = sizeof(u4d); + break; + case 0x9f: + table = u9f; + size = sizeof(u9f); + break; + case 0xa4: + table = ua4; + size = sizeof(ua4); + break; + case 0xd7: + table = ud7; + size = sizeof(ud7); + break; + case 0xfa: + table = ufa; + size = sizeof(ufa); + break; + case 0xfb: + table = ufb; + size = sizeof(ufb); + break; + case 0xfd: + table = ufd; + size = sizeof(ufd); + break; + case 0xfe: + table = ufe; + size = sizeof(ufe); + break; + case 0xff: + table = uff; + size = sizeof(uff); + break; + case 0x103: + table = u103; + size = sizeof(u103); + break; + case 0x104: + table = u104; + size = sizeof(u104); + break; + case 0x1d0: + table = u1d0; + size = sizeof(u1d0); + break; + case 0x1d1: + table = u1d1; + size = sizeof(u1d1); + break; + case 0x1d4: + table = u1d4; + size = sizeof(u1d4); + break; + case 0x1d5: + table = u1d5; + size = sizeof(u1d5); + break; + case 0x1d6: + table = u1d6; + size = sizeof(u1d6); + break; + case 0x1d7: + table = u1d7; + size = sizeof(u1d7); + break; + case 0x2a6: + table = u2a6; + size = sizeof(u2a6); + break; + case 0x2fa: + table = u2fa; + size = sizeof(u2fa); + break; + case 0xe00: + table = ue00; + size = sizeof(ue00); + break; + case 0xfff: + table = ufff; + size = sizeof(ufff); + break; + case 0x10ff: + table = u10ff; + size = sizeof(u10ff); + break; + default: + return 0; } - else if (!strcmp (__locale_charset (), "UTF-8")) + /* we have narrowed down to a section of 256 characters to check */ + /* now check if c matches the printuation wide-chars within that section */ + ptr = (unsigned char *)table; + ctmp = (unsigned char)c; + while (ptr < table + size) { - unicode = 1; - } - - if (unicode) - { - unsigned const char *table; - unsigned char *ptr; - unsigned char ctmp; - int size; - wint_t x = (c >> 8); - - /* for some large sections, all characters are printuation so handle them here */ - if ((x >= 0x34 && x <= 0x4c) || - (x >= 0x4e && x <= 0x9e) || - (x >= 0xac && x <= 0xd6) || - (x >= 0xe0 && x <= 0xf9) || - (x >= 0x200 && x <= 0x2a5) || - (x >= 0xf00 && x <= 0xffe) || - (x >= 0x1000 && x <= 0x10fe)) + if (ctmp == *ptr) return 1; - - switch (x) - { - case 0x01: - case 0x15: - case 0x22: - case 0x25: - case 0x28: - case 0x29: - case 0x2a: - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xfc: - case 0x2f8: - case 0x2f9: - return 1; - case 0x00: - table = u0; - size = sizeof(u0); - break; - case 0x02: - table = u2; - size = sizeof(u2); - break; - case 0x03: - table = u3; - size = sizeof(u3); - break; - case 0x04: - table = u4; - size = sizeof(u4); - break; - case 0x05: - table = u5; - size = sizeof(u5); - break; - case 0x06: - table = u6; - size = sizeof(u6); - break; - case 0x07: - table = u7; - size = sizeof(u7); - break; - case 0x09: - table = u9; - size = sizeof(u9); - break; - case 0x0a: - table = ua; - size = sizeof(ua); - break; - case 0x0b: - table = ub; - size = sizeof(ub); - break; - case 0x0c: - table = uc; - size = sizeof(uc); - break; - case 0x0d: - table = ud; - size = sizeof(ud); - break; - case 0x0e: - table = ue; - size = sizeof(ue); - break; - case 0x0f: - table = uf; - size = sizeof(uf); - break; - case 0x10: - table = u10; - size = sizeof(u10); - break; - case 0x11: - table = u11; - size = sizeof(u11); - break; - case 0x12: - table = u12; - size = sizeof(u12); - break; - case 0x13: - table = u13; - size = sizeof(u13); - break; - case 0x14: - table = u14; - size = sizeof(u14); - break; - case 0x16: - table = u16; - size = sizeof(u16); - break; - case 0x17: - table = u17; - size = sizeof(u17); - break; - case 0x18: - table = u18; - size = sizeof(u18); - break; - case 0x1e: - table = u1e; - size = sizeof(u1e); - break; - case 0x1f: - table = u1f; - size = sizeof(u1f); - break; - case 0x20: - table = u20; - size = sizeof(u20); - break; - case 0x21: - table = u21; - size = sizeof(u21); - break; - case 0x23: - table = u23; - size = sizeof(u23); - break; - case 0x24: - table = u24; - size = sizeof(u24); - break; - case 0x26: - table = u26; - size = sizeof(u26); - break; - case 0x27: - table = u27; - size = sizeof(u27); - break; - case 0x2e: - table = u2e; - size = sizeof(u2e); - break; - case 0x2f: - table = u2f; - size = sizeof(u2f); - break; - case 0x30: - table = u30; - size = sizeof(u30); - break; - case 0x31: - table = u31; - size = sizeof(u31); - break; - case 0x32: - table = u32; - size = sizeof(u32); - break; - case 0x33: - table = u33; - size = sizeof(u33); - break; - case 0x4d: - table = u4d; - size = sizeof(u4d); - break; - case 0x9f: - table = u9f; - size = sizeof(u9f); - break; - case 0xa4: - table = ua4; - size = sizeof(ua4); - break; - case 0xd7: - table = ud7; - size = sizeof(ud7); - break; - case 0xfa: - table = ufa; - size = sizeof(ufa); - break; - case 0xfb: - table = ufb; - size = sizeof(ufb); - break; - case 0xfd: - table = ufd; - size = sizeof(ufd); - break; - case 0xfe: - table = ufe; - size = sizeof(ufe); - break; - case 0xff: - table = uff; - size = sizeof(uff); - break; - case 0x103: - table = u103; - size = sizeof(u103); - break; - case 0x104: - table = u104; - size = sizeof(u104); - break; - case 0x1d0: - table = u1d0; - size = sizeof(u1d0); - break; - case 0x1d1: - table = u1d1; - size = sizeof(u1d1); - break; - case 0x1d4: - table = u1d4; - size = sizeof(u1d4); - break; - case 0x1d5: - table = u1d5; - size = sizeof(u1d5); - break; - case 0x1d6: - table = u1d6; - size = sizeof(u1d6); - break; - case 0x1d7: - table = u1d7; - size = sizeof(u1d7); - break; - case 0x2a6: - table = u2a6; - size = sizeof(u2a6); - break; - case 0x2fa: - table = u2fa; - size = sizeof(u2fa); - break; - case 0xe00: - table = ue00; - size = sizeof(ue00); - break; - case 0xfff: - table = ufff; - size = sizeof(ufff); - break; - case 0x10ff: - table = u10ff; - size = sizeof(u10ff); - break; - default: - return 0; - } - /* we have narrowed down to a section of 256 characters to check */ - /* now check if c matches the printuation wide-chars within that section */ - ptr = (unsigned char *)table; - ctmp = (unsigned char)c; - while (ptr < table + size) + if (ctmp < *ptr) + return 0; + /* otherwise c > *ptr */ + /* look for 0x0 as next element which indicates a range */ + ++ptr; + if (*ptr == 0x0) { - if (ctmp == *ptr) + /* we have a range..see if c falls within range */ + ++ptr; + if (ctmp <= *ptr) return 1; - if (ctmp < *ptr) - return 0; - /* otherwise c > *ptr */ - /* look for 0x0 as next element which indicates a range */ ++ptr; - if (*ptr == 0x0) - { - /* we have a range..see if c falls within range */ - ++ptr; - if (ctmp <= *ptr) - return 1; - ++ptr; - } } - /* not in table */ - return 0; } -#endif /* _MB_CAPABLE */ - + /* not in table */ + return 0; +#else return (c < (wint_t)0x100 ? isprint (c) : 0); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/ctype/iswpunct.c b/newlib/libc/ctype/iswpunct.c index 1f19b66..fc0bd63 100644 --- a/newlib/libc/ctype/iswpunct.c +++ b/newlib/libc/ctype/iswpunct.c @@ -70,260 +70,243 @@ int _DEFUN(iswpunct,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; + unsigned const char *table; + unsigned char *ptr; + unsigned char ctmp; + int size; + wint_t x; if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) + c = __jp2uc (c, JP_EUCJP); + + x = (c >> 8); + /* for some large sections, all characters are punctuation so handle them here */ + if ((x >= 0xe0 && x <= 0xf8) || + (x >= 0xf00 && x <= 0xffe) || + (x >= 0x1000 && x <= 0x10fe)) + return 1; + + switch (x) { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; - } - else if (!strcmp (__locale_charset (), "UTF-8")) - { - unicode = 1; + case 0x22: + case 0x25: + case 0x28: + case 0x29: + case 0x2a: + return 1; + case 0x00: + table = u0; + size = sizeof(u0); + break; + case 0x02: + table = u2; + size = sizeof(u2); + break; + case 0x03: + table = u3; + size = sizeof(u3); + break; + case 0x04: + table = u4; + size = sizeof(u4); + break; + case 0x05: + table = u5; + size = sizeof(u5); + break; + case 0x06: + table = u6; + size = sizeof(u6); + break; + case 0x07: + table = u7; + size = sizeof(u7); + break; + case 0x09: + table = u9; + size = sizeof(u9); + break; + case 0x0a: + table = ua; + size = sizeof(ua); + break; + case 0x0b: + table = ub; + size = sizeof(ub); + break; + case 0x0c: + table = uc; + size = sizeof(uc); + break; + case 0x0d: + table = ud; + size = sizeof(ud); + break; + case 0x0e: + table = ue; + size = sizeof(ue); + break; + case 0x0f: + table = uf; + size = sizeof(uf); + break; + case 0x10: + table = u10; + size = sizeof(u10); + break; + case 0x13: + table = u13; + size = sizeof(u13); + break; + case 0x16: + table = u16; + size = sizeof(u16); + break; + case 0x17: + table = u17; + size = sizeof(u17); + break; + case 0x18: + table = u18; + size = sizeof(u18); + break; + case 0x1f: + table = u1f; + size = sizeof(u1f); + break; + case 0x20: + table = u20; + size = sizeof(u20); + break; + case 0x21: + table = u21; + size = sizeof(u21); + break; + case 0x23: + table = u23; + size = sizeof(u23); + break; + case 0x24: + table = u24; + size = sizeof(u24); + break; + case 0x26: + table = u26; + size = sizeof(u26); + break; + case 0x27: + table = u27; + size = sizeof(u27); + break; + case 0x2e: + table = u2e; + size = sizeof(u2e); + break; + case 0x2f: + table = u2f; + size = sizeof(u2f); + break; + case 0x30: + table = u30; + size = sizeof(u30); + break; + case 0x31: + table = u31; + size = sizeof(u31); + break; + case 0x32: + table = u32; + size = sizeof(u32); + break; + case 0x33: + table = u33; + size = sizeof(u33); + break; + case 0xa4: + table = ua4; + size = sizeof(ua4); + break; + case 0xfb: + table = ufb; + size = sizeof(ufb); + break; + case 0xfd: + table = ufd; + size = sizeof(ufd); + break; + case 0xfe: + table = ufe; + size = sizeof(ufe); + break; + case 0xff: + table = uff; + size = sizeof(uff); + break; + case 0x103: + table = u103; + size = sizeof(u103); + break; + case 0x1d0: + table = u1d0; + size = sizeof(u1d0); + break; + case 0x1d1: + table = u1d1; + size = sizeof(u1d1); + break; + case 0x1d6: + table = u1d6; + size = sizeof(u1d6); + break; + case 0x1d7: + table = u1d7; + size = sizeof(u1d7); + break; + case 0xe00: + table = ue00; + size = sizeof(ue00); + break; + case 0xfff: + table = ufff; + size = sizeof(ufff); + break; + case 0x10ff: + table = u10ff; + size = sizeof(u10ff); + break; + default: + return 0; } - - if (unicode) + /* we have narrowed down to a section of 256 characters to check */ + /* now check if c matches the punctuation wide-chars within that section */ + ptr = (unsigned char *)table; + ctmp = (unsigned char)c; + while (ptr < table + size) { - unsigned const char *table; - unsigned char *ptr; - unsigned char ctmp; - int size; - wint_t x = (c >> 8); - - /* for some large sections, all characters are punctuation so handle them here */ - if ((x >= 0xe0 && x <= 0xf8) || - (x >= 0xf00 && x <= 0xffe) || - (x >= 0x1000 && x <= 0x10fe)) + if (ctmp == *ptr) return 1; - - switch (x) - { - case 0x22: - case 0x25: - case 0x28: - case 0x29: - case 0x2a: - return 1; - case 0x00: - table = u0; - size = sizeof(u0); - break; - case 0x02: - table = u2; - size = sizeof(u2); - break; - case 0x03: - table = u3; - size = sizeof(u3); - break; - case 0x04: - table = u4; - size = sizeof(u4); - break; - case 0x05: - table = u5; - size = sizeof(u5); - break; - case 0x06: - table = u6; - size = sizeof(u6); - break; - case 0x07: - table = u7; - size = sizeof(u7); - break; - case 0x09: - table = u9; - size = sizeof(u9); - break; - case 0x0a: - table = ua; - size = sizeof(ua); - break; - case 0x0b: - table = ub; - size = sizeof(ub); - break; - case 0x0c: - table = uc; - size = sizeof(uc); - break; - case 0x0d: - table = ud; - size = sizeof(ud); - break; - case 0x0e: - table = ue; - size = sizeof(ue); - break; - case 0x0f: - table = uf; - size = sizeof(uf); - break; - case 0x10: - table = u10; - size = sizeof(u10); - break; - case 0x13: - table = u13; - size = sizeof(u13); - break; - case 0x16: - table = u16; - size = sizeof(u16); - break; - case 0x17: - table = u17; - size = sizeof(u17); - break; - case 0x18: - table = u18; - size = sizeof(u18); - break; - case 0x1f: - table = u1f; - size = sizeof(u1f); - break; - case 0x20: - table = u20; - size = sizeof(u20); - break; - case 0x21: - table = u21; - size = sizeof(u21); - break; - case 0x23: - table = u23; - size = sizeof(u23); - break; - case 0x24: - table = u24; - size = sizeof(u24); - break; - case 0x26: - table = u26; - size = sizeof(u26); - break; - case 0x27: - table = u27; - size = sizeof(u27); - break; - case 0x2e: - table = u2e; - size = sizeof(u2e); - break; - case 0x2f: - table = u2f; - size = sizeof(u2f); - break; - case 0x30: - table = u30; - size = sizeof(u30); - break; - case 0x31: - table = u31; - size = sizeof(u31); - break; - case 0x32: - table = u32; - size = sizeof(u32); - break; - case 0x33: - table = u33; - size = sizeof(u33); - break; - case 0xa4: - table = ua4; - size = sizeof(ua4); - break; - case 0xfb: - table = ufb; - size = sizeof(ufb); - break; - case 0xfd: - table = ufd; - size = sizeof(ufd); - break; - case 0xfe: - table = ufe; - size = sizeof(ufe); - break; - case 0xff: - table = uff; - size = sizeof(uff); - break; - case 0x103: - table = u103; - size = sizeof(u103); - break; - case 0x1d0: - table = u1d0; - size = sizeof(u1d0); - break; - case 0x1d1: - table = u1d1; - size = sizeof(u1d1); - break; - case 0x1d6: - table = u1d6; - size = sizeof(u1d6); - break; - case 0x1d7: - table = u1d7; - size = sizeof(u1d7); - break; - case 0xe00: - table = ue00; - size = sizeof(ue00); - break; - case 0xfff: - table = ufff; - size = sizeof(ufff); - break; - case 0x10ff: - table = u10ff; - size = sizeof(u10ff); - break; - default: - return 0; - } - /* we have narrowed down to a section of 256 characters to check */ - /* now check if c matches the punctuation wide-chars within that section */ - ptr = (unsigned char *)table; - ctmp = (unsigned char)c; - while (ptr < table + size) + if (ctmp < *ptr) + return 0; + /* otherwise c > *ptr */ + /* look for 0x0 as next element which indicates a range */ + ++ptr; + if (*ptr == 0x0) { - if (ctmp == *ptr) + /* we have a range..see if c falls within range */ + ++ptr; + if (ctmp <= *ptr) return 1; - if (ctmp < *ptr) - return 0; - /* otherwise c > *ptr */ - /* look for 0x0 as next element which indicates a range */ ++ptr; - if (*ptr == 0x0) - { - /* we have a range..see if c falls within range */ - ++ptr; - if (ctmp <= *ptr) - return 1; - ++ptr; - } } - /* not in table */ - return 0; } -#endif /* _MB_CAPABLE */ - + /* not in table */ + return 0; +#else return (c < (wint_t)0x100 ? ispunct (c) : 0); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/ctype/iswspace.c b/newlib/libc/ctype/iswspace.c index b9c7439..100e782 100644 --- a/newlib/libc/ctype/iswspace.c +++ b/newlib/libc/ctype/iswspace.c @@ -66,38 +66,19 @@ int _DEFUN(iswspace,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; - if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) - { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; - } - else if (!strcmp (__locale_charset (), "UTF-8")) - { - unicode = 1; - } - - if (unicode) - { - return ((c >= 0x0009 && c <= 0x000d) || c == 0x0020 || c == 0x1680 || - (c >= 0x2000 && c <= 0x2006) || - (c >= 0x2008 && c <= 0x200b) || - c == 0x2028 || c == 0x2029 || - c == 0x205f || c == 0x3000); - } -#endif /* _MB_CAPABLE */ - + c = __jp2uc (c, JP_EUCJP); + return ((c >= 0x0009 && c <= 0x000d) || c == 0x0020 || c == 0x1680 || + (c >= 0x2000 && c <= 0x2006) || + (c >= 0x2008 && c <= 0x200b) || + c == 0x2028 || c == 0x2029 || + c == 0x205f || c == 0x3000); +#else return (c < 0x100 ? isspace (c) : 0); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/ctype/jp2uc.c b/newlib/libc/ctype/jp2uc.c index 9d26a7a..d109644 100644 --- a/newlib/libc/ctype/jp2uc.c +++ b/newlib/libc/ctype/jp2uc.c @@ -41,6 +41,11 @@ wint_t _DEFUN (__jp2uc, (c, type), wint_t c _AND int type) { +/* Under Cygwin, the incoming wide character is already given in UTF due + to the requirements of the underlying OS. */ +#ifdef __CYGWIN__ + return c; +#else int index, adj; unsigned char byte1, byte2; wint_t ret; @@ -140,6 +145,7 @@ _DEFUN (__jp2uc, (c, type), wint_t c _AND int type) } return WEOF; +#endif } #endif /* _MB_CAPABLE */ diff --git a/newlib/libc/ctype/towlower.c b/newlib/libc/ctype/towlower.c index c3fcb77..edda8ca 100644 --- a/newlib/libc/ctype/towlower.c +++ b/newlib/libc/ctype/towlower.c @@ -70,405 +70,388 @@ wint_t _DEFUN(towlower,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; - if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) + c = __jp2uc (c, JP_EUCJP); + + if (c < 0x100) { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; - } - else if (!strcmp (__locale_charset (), "UTF-8")) - { - unicode = 1; - } + if ((c >= 0x0041 && c <= 0x005a) || + (c >= 0x00c0 && c <= 0x00de)) + return (c + 0x20); - if (unicode) + if (c == 0x00b5) + return 0x03bc; + + return c; + } + else if (c < 0x300) { - if (c < 0x100) + if ((c >= 0x0100 && c <= 0x012e) || + (c >= 0x0132 && c <= 0x0136) || + (c >= 0x014a && c <= 0x0176) || + (c >= 0x01de && c <= 0x01ee) || + (c >= 0x01f8 && c <= 0x021e) || + (c >= 0x0222 && c <= 0x0232)) { - if ((c >= 0x0041 && c <= 0x005a) || - (c >= 0x00c0 && c <= 0x00de)) - return (c + 0x20); + if (!(c & 0x01)) + return (c + 1); + return c; + } - if (c == 0x00b5) - return 0x03bc; - + if ((c >= 0x0139 && c <= 0x0147) || + (c >= 0x01cd && c <= 0x91db)) + { + if (c & 0x01) + return (c + 1); return c; } - else if (c < 0x300) + + if (c >= 0x178 && c <= 0x01f7) { - if ((c >= 0x0100 && c <= 0x012e) || - (c >= 0x0132 && c <= 0x0136) || - (c >= 0x014a && c <= 0x0176) || - (c >= 0x01de && c <= 0x01ee) || - (c >= 0x01f8 && c <= 0x021e) || - (c >= 0x0222 && c <= 0x0232)) + wint_t k; + switch (c) { - if (!(c & 0x01)) - return (c + 1); - return c; + case 0x0178: + k = 0x00ff; + break; + case 0x0179: + case 0x017b: + case 0x017d: + case 0x0182: + case 0x0184: + case 0x0187: + case 0x018b: + case 0x0191: + case 0x0198: + case 0x01a0: + case 0x01a2: + case 0x01a4: + case 0x01a7: + case 0x01ac: + case 0x01af: + case 0x01b3: + case 0x01b5: + case 0x01b8: + case 0x01bc: + case 0x01c5: + case 0x01c8: + case 0x01cb: + case 0x01cd: + case 0x01cf: + case 0x01d1: + case 0x01d3: + case 0x01d5: + case 0x01d7: + case 0x01d9: + case 0x01db: + case 0x01f2: + case 0x01f4: + k = c + 1; + break; + case 0x017f: + k = 0x0073; + break; + case 0x0181: + k = 0x0253; + break; + case 0x0186: + k = 0x0254; + break; + case 0x0189: + k = 0x0256; + break; + case 0x018a: + k = 0x0257; + break; + case 0x018e: + k = 0x01dd; + break; + case 0x018f: + k = 0x0259; + break; + case 0x0190: + k = 0x025b; + break; + case 0x0193: + k = 0x0260; + break; + case 0x0194: + k = 0x0263; + break; + case 0x0196: + k = 0x0269; + break; + case 0x0197: + k = 0x0268; + break; + case 0x019c: + k = 0x026f; + break; + case 0x019d: + k = 0x0272; + break; + case 0x019f: + k = 0x0275; + break; + case 0x01a6: + k = 0x0280; + break; + case 0x01a9: + k = 0x0283; + break; + case 0x01ae: + k = 0x0288; + break; + case 0x01b1: + k = 0x028a; + break; + case 0x01b2: + k = 0x028b; + break; + case 0x01b7: + k = 0x0292; + break; + case 0x01c4: + case 0x01c7: + case 0x01ca: + case 0x01f1: + k = c + 2; + break; + case 0x01f6: + k = 0x0195; + break; + case 0x01f7: + k = 0x01bf; + break; + default: + k = 0; } + if (k != 0) + return k; + } - if ((c >= 0x0139 && c <= 0x0147) || - (c >= 0x01cd && c <= 0x91db)) - { - if (c & 0x01) - return (c + 1); - return c; - } - - if (c >= 0x178 && c <= 0x01f7) + if (c == 0x0220) + return 0x019e; + } + else if (c < 0x0400) + { + if (c >= 0x0391 && c <= 0x03ab && c != 0x03a2) + return (c + 0x20); + if (c >= 0x03d8 && c <= 0x03ee && !(c & 0x01)) + return (c + 1); + if (c >= 0x0386 && c <= 0x03f5) + { + wint_t k; + switch (c) { - wint_t k; - switch (c) - { - case 0x0178: - k = 0x00ff; - break; - case 0x0179: - case 0x017b: - case 0x017d: - case 0x0182: - case 0x0184: - case 0x0187: - case 0x018b: - case 0x0191: - case 0x0198: - case 0x01a0: - case 0x01a2: - case 0x01a4: - case 0x01a7: - case 0x01ac: - case 0x01af: - case 0x01b3: - case 0x01b5: - case 0x01b8: - case 0x01bc: - case 0x01c5: - case 0x01c8: - case 0x01cb: - case 0x01cd: - case 0x01cf: - case 0x01d1: - case 0x01d3: - case 0x01d5: - case 0x01d7: - case 0x01d9: - case 0x01db: - case 0x01f2: - case 0x01f4: - k = c + 1; - break; - case 0x017f: - k = 0x0073; - break; - case 0x0181: - k = 0x0253; - break; - case 0x0186: - k = 0x0254; - break; - case 0x0189: - k = 0x0256; - break; - case 0x018a: - k = 0x0257; - break; - case 0x018e: - k = 0x01dd; - break; - case 0x018f: - k = 0x0259; - break; - case 0x0190: - k = 0x025b; - break; - case 0x0193: - k = 0x0260; - break; - case 0x0194: - k = 0x0263; - break; - case 0x0196: - k = 0x0269; - break; - case 0x0197: - k = 0x0268; - break; - case 0x019c: - k = 0x026f; - break; - case 0x019d: - k = 0x0272; - break; - case 0x019f: - k = 0x0275; - break; - case 0x01a6: - k = 0x0280; - break; - case 0x01a9: - k = 0x0283; - break; - case 0x01ae: - k = 0x0288; - break; - case 0x01b1: - k = 0x028a; - break; - case 0x01b2: - k = 0x028b; - break; - case 0x01b7: - k = 0x0292; - break; - case 0x01c4: - case 0x01c7: - case 0x01ca: - case 0x01f1: - k = c + 2; - break; - case 0x01f6: - k = 0x0195; - break; - case 0x01f7: - k = 0x01bf; - break; - default: - k = 0; - } - if (k != 0) - return k; + case 0x0386: + k = 0x03ac; + break; + case 0x0388: + k = 0x03ad; + break; + case 0x0389: + k = 0x03ae; + break; + case 0x038a: + k = 0x03af; + break; + case 0x038c: + k = 0x03cc; + break; + case 0x038e: + k = 0x03cd; + break; + case 0x038f: + k = 0x038f; + break; + case 0x03c2: + k = 0x03c3; + break; + case 0x03d0: + k = 0x03b2; + break; + case 0x03d1: + k = 0x03b8; + break; + case 0x03d5: + k = 0x03c6; + break; + case 0x03d6: + k = 0x03c0; + break; + case 0x03f0: + k = 0x03ba; + break; + case 0x03f1: + k = 0x03c1; + break; + case 0x03f2: + k = 0x03c3; + break; + case 0x03f4: + k = 0x03b8; + break; + case 0x03f5: + k = 0x03b5; + break; + default: + k = 0; } - - if (c == 0x0220) - return 0x019e; + if (k != 0) + return k; } - else if (c < 0x0400) + + if (c == 0x0345) + return 0x03b9; + } + else if (c < 0x500) + { + if (c >= 0x0400 && c <= 0x040f) + return (c + 0x50); + + if (c >= 0x0410 && c <= 0x042f) + return (c + 0x20); + + if ((c >= 0x0460 && c <= 0x0480) || + (c >= 0x048a && c <= 0x04be) || + (c >= 0x04d0 && c <= 0x04f4) || + (c == 0x04f8)) { - if (c >= 0x0391 && c <= 0x03ab && c != 0x03a2) - return (c + 0x20); - if (c >= 0x03d8 && c <= 0x03ee && !(c & 0x01)) + if (!(c & 0x01)) return (c + 1); - if (c >= 0x0386 && c <= 0x03f5) - { - wint_t k; - switch (c) - { - case 0x0386: - k = 0x03ac; - break; - case 0x0388: - k = 0x03ad; - break; - case 0x0389: - k = 0x03ae; - break; - case 0x038a: - k = 0x03af; - break; - case 0x038c: - k = 0x03cc; - break; - case 0x038e: - k = 0x03cd; - break; - case 0x038f: - k = 0x038f; - break; - case 0x03c2: - k = 0x03c3; - break; - case 0x03d0: - k = 0x03b2; - break; - case 0x03d1: - k = 0x03b8; - break; - case 0x03d5: - k = 0x03c6; - break; - case 0x03d6: - k = 0x03c0; - break; - case 0x03f0: - k = 0x03ba; - break; - case 0x03f1: - k = 0x03c1; - break; - case 0x03f2: - k = 0x03c3; - break; - case 0x03f4: - k = 0x03b8; - break; - case 0x03f5: - k = 0x03b5; - break; - default: - k = 0; - } - if (k != 0) - return k; - } - - if (c == 0x0345) - return 0x03b9; + return c; } - else if (c < 0x500) + + if (c >= 0x04c1 && c <= 0x04cd) { - if (c >= 0x0400 && c <= 0x040f) - return (c + 0x50); - - if (c >= 0x0410 && c <= 0x042f) - return (c + 0x20); - - if ((c >= 0x0460 && c <= 0x0480) || - (c >= 0x048a && c <= 0x04be) || - (c >= 0x04d0 && c <= 0x04f4) || - (c == 0x04f8)) - { - if (!(c & 0x01)) - return (c + 1); - return c; - } - - if (c >= 0x04c1 && c <= 0x04cd) - { - if (c & 0x01) - return (c + 1); - return c; - } + if (c & 0x01) + return (c + 1); + return c; } - else if (c < 0x1f00) + } + else if (c < 0x1f00) + { + if ((c >= 0x0500 && c <= 0x050e) || + (c >= 0x1e00 && c <= 0x1e94) || + (c >= 0x1ea0 && c <= 0x1ef8)) { - if ((c >= 0x0500 && c <= 0x050e) || - (c >= 0x1e00 && c <= 0x1e94) || - (c >= 0x1ea0 && c <= 0x1ef8)) - { - if (!(c & 0x01)) - return (c + 1); - return c; - } - - if (c >= 0x0531 && c <= 0x0556) - return (c + 0x30); - - if (c == 0x1e9b) - return 0x1e61; + if (!(c & 0x01)) + return (c + 1); + return c; } - else if (c < 0x2000) + + if (c >= 0x0531 && c <= 0x0556) + return (c + 0x30); + + if (c == 0x1e9b) + return 0x1e61; + } + else if (c < 0x2000) + { + if ((c >= 0x1f08 && c <= 0x1f0f) || + (c >= 0x1f18 && c <= 0x1f1d) || + (c >= 0x1f28 && c <= 0x1f2f) || + (c >= 0x1f38 && c <= 0x1f3f) || + (c >= 0x1f48 && c <= 0x1f4d) || + (c >= 0x1f68 && c <= 0x1f6f) || + (c >= 0x1f88 && c <= 0x1f8f) || + (c >= 0x1f98 && c <= 0x1f9f) || + (c >= 0x1fa8 && c <= 0x1faf)) + return (c - 0x08); + + if (c >= 0x1f59 && c <= 0x1f5f) { - if ((c >= 0x1f08 && c <= 0x1f0f) || - (c >= 0x1f18 && c <= 0x1f1d) || - (c >= 0x1f28 && c <= 0x1f2f) || - (c >= 0x1f38 && c <= 0x1f3f) || - (c >= 0x1f48 && c <= 0x1f4d) || - (c >= 0x1f68 && c <= 0x1f6f) || - (c >= 0x1f88 && c <= 0x1f8f) || - (c >= 0x1f98 && c <= 0x1f9f) || - (c >= 0x1fa8 && c <= 0x1faf)) + if (c & 0x01) return (c - 0x08); - - if (c >= 0x1f59 && c <= 0x1f5f) - { - if (c & 0x01) - return (c - 0x08); - return c; - } - - if (c >= 0x1fb8 && c <= 0x1ffc) + return c; + } + + if (c >= 0x1fb8 && c <= 0x1ffc) + { + wint_t k; + switch (c) { - wint_t k; - switch (c) - { - case 0x1fb8: - case 0x1fb9: - case 0x1fd8: - case 0x1fd9: - case 0x1fe8: - case 0x1fe9: - k = c - 0x08; - break; - case 0x1fba: - case 0x1fbb: - k = c - 0x4a; - break; - case 0x1fbc: - k = 0x1fb3; - break; - case 0x1fbe: - k = 0x03b9; - break; - case 0x1fc8: - case 0x1fc9: - case 0x1fca: - case 0x1fcb: - k = c - 0x56; - break; - case 0x1fcc: - k = 0x1fc3; - break; - case 0x1fda: - case 0x1fdb: - k = c - 0x64; - break; - case 0x1fea: - case 0x1feb: - k = c - 0x70; - break; - case 0x1fec: - k = 0x1fe5; - break; - case 0x1ffa: - case 0x1ffb: - k = c - 0x7e; - break; - case 0x1ffc: - k = 0x1ff3; - break; - default: - k = 0; - } - if (k != 0) - return k; + case 0x1fb8: + case 0x1fb9: + case 0x1fd8: + case 0x1fd9: + case 0x1fe8: + case 0x1fe9: + k = c - 0x08; + break; + case 0x1fba: + case 0x1fbb: + k = c - 0x4a; + break; + case 0x1fbc: + k = 0x1fb3; + break; + case 0x1fbe: + k = 0x03b9; + break; + case 0x1fc8: + case 0x1fc9: + case 0x1fca: + case 0x1fcb: + k = c - 0x56; + break; + case 0x1fcc: + k = 0x1fc3; + break; + case 0x1fda: + case 0x1fdb: + k = c - 0x64; + break; + case 0x1fea: + case 0x1feb: + k = c - 0x70; + break; + case 0x1fec: + k = 0x1fe5; + break; + case 0x1ffa: + case 0x1ffb: + k = c - 0x7e; + break; + case 0x1ffc: + k = 0x1ff3; + break; + default: + k = 0; } + if (k != 0) + return k; } - else - { - if (c >= 0x2160 && c <= 0x216f) - return (c + 0x10); - - if (c >= 0x24b6 && c <= 0x24cf) - return (c + 0x1a); - - if (c >= 0xff21 && c <= 0xff3a) - return (c + 0x20); - - if (c >= 0x10400 && c <= 0x10425) - return (c + 0x28); + } + else + { + if (c >= 0x2160 && c <= 0x216f) + return (c + 0x10); + + if (c >= 0x24b6 && c <= 0x24cf) + return (c + 0x1a); + + if (c >= 0xff21 && c <= 0xff3a) + return (c + 0x20); + + if (c >= 0x10400 && c <= 0x10425) + return (c + 0x28); - if (c == 0x2126) - return 0x03c9; - if (c == 0x212a) - return 0x006b; - if (c == 0x212b) - return 0x00e5; - } - } -#endif /* _MB_CAPABLE */ - + if (c == 0x2126) + return 0x03c9; + if (c == 0x212a) + return 0x006b; + if (c == 0x212b) + return 0x00e5; + } + return c; +#else return (c < 0x00ff ? (wint_t)(tolower ((int)c)) : c); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/ctype/towupper.c b/newlib/libc/ctype/towupper.c index 385dc9b..dee9468 100644 --- a/newlib/libc/ctype/towupper.c +++ b/newlib/libc/ctype/towupper.c @@ -70,432 +70,415 @@ wint_t _DEFUN(towupper,(c), wint_t c) { #ifdef _MB_CAPABLE - int unicode = 0; - if (!strcmp (__locale_charset (), "JIS")) - { - c = __jp2uc (c, JP_JIS); - unicode = 1; - } + c = __jp2uc (c, JP_JIS); else if (!strcmp (__locale_charset (), "SJIS")) - { - c = __jp2uc (c, JP_SJIS); - unicode = 1; - } + c = __jp2uc (c, JP_SJIS); else if (!strcmp (__locale_charset (), "EUCJP")) + c = __jp2uc (c, JP_EUCJP); + + if (c < 0x100) { - c = __jp2uc (c, JP_EUCJP); - unicode = 1; - } - else if (!strcmp (__locale_charset (), "UTF-8")) - { - unicode = 1; + if (c == 0x00b5) + return 0x039c; + + if ((c >= 0x00e0 && c <= 0x00fe) || + (c >= 0x0061 && c <= 0x007a)) + return (c - 0x20); + + if (c == 0xff) + return 0x0178; + + return c; } - - if (unicode) + else if (c < 0x300) { - if (c < 0x100) + if ((c >= 0x0101 && c <= 0x012f) || + (c >= 0x0133 && c <= 0x0137) || + (c >= 0x014b && c <= 0x0177) || + (c >= 0x01df && c <= 0x01ef) || + (c >= 0x01f9 && c <= 0x021f) || + (c >= 0x0223 && c <= 0x0233)) { - if (c == 0x00b5) - return 0x039c; - - if ((c >= 0x00e0 && c <= 0x00fe) || - (c >= 0x0061 && c <= 0x007a)) - return (c - 0x20); - - if (c == 0xff) - return 0x0178; - + if (c & 0x01) + return (c - 1); return c; } - else if (c < 0x300) - { - if ((c >= 0x0101 && c <= 0x012f) || - (c >= 0x0133 && c <= 0x0137) || - (c >= 0x014b && c <= 0x0177) || - (c >= 0x01df && c <= 0x01ef) || - (c >= 0x01f9 && c <= 0x021f) || - (c >= 0x0223 && c <= 0x0233)) - { - if (c & 0x01) - return (c - 1); - return c; - } - if ((c >= 0x013a && c <= 0x0148) || - (c >= 0x01ce && c <= 0x1dc)) - { - if (!(c & 0x01)) - return (c - 1); - return c; - } - - if (c == 0x0131) - return 0x0049; - - if (c == 0x017a || c == 0x017c || c == 0x017e) + if ((c >= 0x013a && c <= 0x0148) || + (c >= 0x01ce && c <= 0x1dc)) + { + if (!(c & 0x01)) return (c - 1); - - if (c >= 0x017f && c <= 0x0292) - { - wint_t k; - switch (c) - { - case 0x017f: - k = 0x0053; - break; - case 0x0183: - k = 0x0182; - break; - case 0x0185: - k = 0x0184; - break; - case 0x0188: - k = 0x0187; - break; - case 0x018c: - k = 0x018b; - break; - case 0x0192: - k = 0x0191; - break; - case 0x0195: - k = 0x01f6; - break; - case 0x0199: - k = 0x0198; - break; - case 0x019e: - k = 0x0220; - break; - case 0x01a1: - case 0x01a3: - case 0x01a5: - case 0x01a8: - case 0x01ad: - case 0x01b0: - case 0x01b4: - case 0x01b6: - case 0x01b9: - case 0x01bd: - case 0x01c5: - case 0x01c8: - case 0x01cb: - case 0x01f2: - case 0x01f5: - k = c - 1; - break; - case 0x01bf: - k = 0x01f7; - break; - case 0x01c6: - case 0x01c9: - case 0x01cc: - k = c - 2; - break; - case 0x01dd: - k = 0x018e; - break; - case 0x01f3: - k = 0x01f1; - break; - case 0x0253: - k = 0x0181; - break; - case 0x0254: - k = 0x0186; - break; - case 0x0256: - k = 0x0189; - break; - case 0x0257: - k = 0x018a; - break; - case 0x0259: - k = 0x018f; - break; - case 0x025b: - k = 0x0190; - break; - case 0x0260: - k = 0x0193; - break; - case 0x0263: - k = 0x0194; - break; - case 0x0268: - k = 0x0197; - break; - case 0x0269: - k = 0x0196; - break; - case 0x026f: - k = 0x019c; - break; - case 0x0272: - k = 0x019d; - break; - case 0x0275: - k = 0x019f; - break; - case 0x0280: - k = 0x01a6; - break; - case 0x0283: - k = 0x01a9; - break; - case 0x0288: - k = 0x01ae; - break; - case 0x028a: - k = 0x01b1; - break; - case 0x028b: - k = 0x01b2; - break; - case 0x0292: - k = 0x01b7; - break; - default: - k = 0; - } - if (k != 0) - return k; - } + return c; } - else if (c < 0x0400) + + if (c == 0x0131) + return 0x0049; + + if (c == 0x017a || c == 0x017c || c == 0x017e) + return (c - 1); + + if (c >= 0x017f && c <= 0x0292) { - if (c == 0x03ac) - return 0x0386; - - if ((c & 0xfff0) == 0x03a0 && c >= 0x03ad) - return (c - 0x15); - - if (c >= 0x03b1 && c <= 0x03cb && c != 0x03c2) - return (c - 0x20); - - if (c == 0x03c2) - return 0x03a3; - - if (c >= 0x03cc && c <= 0x03f5) + wint_t k; + switch (c) { - wint_t k; - switch (c) - { - case 0x03cc: - k = 0x038c; - break; - case 0x03cd: - case 0x03ce: - k = c - 0x3f; - break; - case 0x03d0: - k = 0x0392; - break; - case 0x03d1: - k = 0x0398; - break; - case 0x03d5: - k = 0x03a6; - break; - case 0x03d6: - k = 0x03a0; - break; - case 0x03d9: - case 0x03db: - case 0x03dd: - case 0x03df: - case 0x03e1: - case 0x03e3: - case 0x03e5: - case 0x03e7: - case 0x03e9: - case 0x03eb: - case 0x03ed: - case 0x03ef: - k = c - 1; - break; - case 0x03f0: - k = 0x039a; - break; - case 0x03f1: - k = 0x03a1; - break; - case 0x03f2: - k = 0x03a3; - break; - case 0x03f5: - k = 0x0395; - break; - default: - k = 0; - } - if (k != 0) - return k; + case 0x017f: + k = 0x0053; + break; + case 0x0183: + k = 0x0182; + break; + case 0x0185: + k = 0x0184; + break; + case 0x0188: + k = 0x0187; + break; + case 0x018c: + k = 0x018b; + break; + case 0x0192: + k = 0x0191; + break; + case 0x0195: + k = 0x01f6; + break; + case 0x0199: + k = 0x0198; + break; + case 0x019e: + k = 0x0220; + break; + case 0x01a1: + case 0x01a3: + case 0x01a5: + case 0x01a8: + case 0x01ad: + case 0x01b0: + case 0x01b4: + case 0x01b6: + case 0x01b9: + case 0x01bd: + case 0x01c5: + case 0x01c8: + case 0x01cb: + case 0x01f2: + case 0x01f5: + k = c - 1; + break; + case 0x01bf: + k = 0x01f7; + break; + case 0x01c6: + case 0x01c9: + case 0x01cc: + k = c - 2; + break; + case 0x01dd: + k = 0x018e; + break; + case 0x01f3: + k = 0x01f1; + break; + case 0x0253: + k = 0x0181; + break; + case 0x0254: + k = 0x0186; + break; + case 0x0256: + k = 0x0189; + break; + case 0x0257: + k = 0x018a; + break; + case 0x0259: + k = 0x018f; + break; + case 0x025b: + k = 0x0190; + break; + case 0x0260: + k = 0x0193; + break; + case 0x0263: + k = 0x0194; + break; + case 0x0268: + k = 0x0197; + break; + case 0x0269: + k = 0x0196; + break; + case 0x026f: + k = 0x019c; + break; + case 0x0272: + k = 0x019d; + break; + case 0x0275: + k = 0x019f; + break; + case 0x0280: + k = 0x01a6; + break; + case 0x0283: + k = 0x01a9; + break; + case 0x0288: + k = 0x01ae; + break; + case 0x028a: + k = 0x01b1; + break; + case 0x028b: + k = 0x01b2; + break; + case 0x0292: + k = 0x01b7; + break; + default: + k = 0; } + if (k != 0) + return k; } - else if (c < 0x500) + } + else if (c < 0x0400) + { + if (c == 0x03ac) + return 0x0386; + + if ((c & 0xfff0) == 0x03a0 && c >= 0x03ad) + return (c - 0x15); + + if (c >= 0x03b1 && c <= 0x03cb && c != 0x03c2) + return (c - 0x20); + + if (c == 0x03c2) + return 0x03a3; + + if (c >= 0x03cc && c <= 0x03f5) { - if (c >= 0x0450 && c <= 0x045f) - return (c - 0x50); - - if (c >= 0x0430 && c <= 0x044f) - return (c - 0x20); - - if ((c >= 0x0461 && c <= 0x0481) || - (c >= 0x048b && c <= 0x04bf) || - (c >= 0x04d1 && c <= 0x04f5)) - { - if (c & 0x01) - return (c - 1); - return c; - } - - if (c >= 0x04c2 && c <= 0x04ce) + wint_t k; + switch (c) { - if (!(c & 0x01)) - return (c - 1); - return c; + case 0x03cc: + k = 0x038c; + break; + case 0x03cd: + case 0x03ce: + k = c - 0x3f; + break; + case 0x03d0: + k = 0x0392; + break; + case 0x03d1: + k = 0x0398; + break; + case 0x03d5: + k = 0x03a6; + break; + case 0x03d6: + k = 0x03a0; + break; + case 0x03d9: + case 0x03db: + case 0x03dd: + case 0x03df: + case 0x03e1: + case 0x03e3: + case 0x03e5: + case 0x03e7: + case 0x03e9: + case 0x03eb: + case 0x03ed: + case 0x03ef: + k = c - 1; + break; + case 0x03f0: + k = 0x039a; + break; + case 0x03f1: + k = 0x03a1; + break; + case 0x03f2: + k = 0x03a3; + break; + case 0x03f5: + k = 0x0395; + break; + default: + k = 0; } - - if (c == 0x04f9) - return 0x04f8; + if (k != 0) + return k; } - else if (c < 0x1f00) + } + else if (c < 0x500) + { + if (c >= 0x0450 && c <= 0x045f) + return (c - 0x50); + + if (c >= 0x0430 && c <= 0x044f) + return (c - 0x20); + + if ((c >= 0x0461 && c <= 0x0481) || + (c >= 0x048b && c <= 0x04bf) || + (c >= 0x04d1 && c <= 0x04f5)) { - if ((c >= 0x0501 && c <= 0x050f) || - (c >= 0x1e01 && c <= 0x1e95) || - (c >= 0x1ea1 && c <= 0x1ef9)) - { - if (c & 0x01) - return (c - 1); - return c; - } - - if (c >= 0x0561 && c <= 0x0586) - return (c - 0x30); - - if (c == 0x1e9b) - return 0x1e60; + if (c & 0x01) + return (c - 1); + return c; } - else if (c < 0x2000) + + if (c >= 0x04c2 && c <= 0x04ce) { - - if ((c >= 0x1f00 && c <= 0x1f07) || - (c >= 0x1f10 && c <= 0x1f15) || - (c >= 0x1f20 && c <= 0x1f27) || - (c >= 0x1f30 && c <= 0x1f37) || - (c >= 0x1f40 && c <= 0x1f45) || - (c >= 0x1f60 && c <= 0x1f67) || - (c >= 0x1f80 && c <= 0x1f87) || - (c >= 0x1f90 && c <= 0x1f97) || - (c >= 0x1fa0 && c <= 0x1fa7)) - return (c + 0x08); - - if (c >= 0x1f51 && c <= 0x1f57 && (c & 0x01)) - return (c + 0x08); - - if (c >= 0x1f70 && c <= 0x1ff3) - { - wint_t k; - switch (c) - { - case 0x1fb0: - k = 0x1fb8; - break; - case 0x1fb1: - k = 0x1fb9; - break; - case 0x1f70: - k = 0x1fba; - break; - case 0x1f71: - k = 0x1fbb; - break; - case 0x1fb3: - k = 0x1fbc; - break; - case 0x1fbe: - k = 0x0399; - break; - case 0x1f72: - k = 0x1fc8; - break; - case 0x1f73: - k = 0x1fc9; - break; - case 0x1f74: - k = 0x1fca; - break; - case 0x1f75: - k = 0x1fcb; - break; - case 0x1fd0: - k = 0x1fd8; - break; - case 0x1fd1: - k = 0x1fd9; - break; - case 0x1f76: - k = 0x1fda; - break; - case 0x1f77: - k = 0x1fdb; - break; - case 0x1fe0: - k = 0x1fe8; - break; - case 0x1fe1: - k = 0x1fe9; - break; - case 0x1f7a: - k = 0x1fea; - break; - case 0x1f7b: - k = 0x1feb; - break; - case 0x1fe5: - k = 0x1fec; - break; - case 0x1f78: - k = 0x1ff8; - break; - case 0x1f79: - k = 0x1ff9; - break; - case 0x1f7c: - k = 0x1ffa; - break; - case 0x1f7d: - k = 0x1ffb; - break; - case 0x1ff3: - k = 0x1ffc; - break; - default: - k = 0; - } - if (k != 0) - return k; - } + if (!(c & 0x01)) + return (c - 1); + return c; } - else + + if (c == 0x04f9) + return 0x04f8; + } + else if (c < 0x1f00) + { + if ((c >= 0x0501 && c <= 0x050f) || + (c >= 0x1e01 && c <= 0x1e95) || + (c >= 0x1ea1 && c <= 0x1ef9)) { - if (c >= 0x2170 && c <= 0x217f) - return (c - 0x10); - - if (c >= 0x24d0 && c <= 0x24e9) - return (c - 0x1a); - - if (c >= 0xff41 && c <= 0xff5a) - return (c - 0x20); - - if (c >= 0x10428 && c <= 0x1044d) - return (c - 0x28); + if (c & 0x01) + return (c - 1); + return c; } - } -#endif /* _MB_CAPABLE */ - + + if (c >= 0x0561 && c <= 0x0586) + return (c - 0x30); + + if (c == 0x1e9b) + return 0x1e60; + } + else if (c < 0x2000) + { + + if ((c >= 0x1f00 && c <= 0x1f07) || + (c >= 0x1f10 && c <= 0x1f15) || + (c >= 0x1f20 && c <= 0x1f27) || + (c >= 0x1f30 && c <= 0x1f37) || + (c >= 0x1f40 && c <= 0x1f45) || + (c >= 0x1f60 && c <= 0x1f67) || + (c >= 0x1f80 && c <= 0x1f87) || + (c >= 0x1f90 && c <= 0x1f97) || + (c >= 0x1fa0 && c <= 0x1fa7)) + return (c + 0x08); + + if (c >= 0x1f51 && c <= 0x1f57 && (c & 0x01)) + return (c + 0x08); + + if (c >= 0x1f70 && c <= 0x1ff3) + { + wint_t k; + switch (c) + { + case 0x1fb0: + k = 0x1fb8; + break; + case 0x1fb1: + k = 0x1fb9; + break; + case 0x1f70: + k = 0x1fba; + break; + case 0x1f71: + k = 0x1fbb; + break; + case 0x1fb3: + k = 0x1fbc; + break; + case 0x1fbe: + k = 0x0399; + break; + case 0x1f72: + k = 0x1fc8; + break; + case 0x1f73: + k = 0x1fc9; + break; + case 0x1f74: + k = 0x1fca; + break; + case 0x1f75: + k = 0x1fcb; + break; + case 0x1fd0: + k = 0x1fd8; + break; + case 0x1fd1: + k = 0x1fd9; + break; + case 0x1f76: + k = 0x1fda; + break; + case 0x1f77: + k = 0x1fdb; + break; + case 0x1fe0: + k = 0x1fe8; + break; + case 0x1fe1: + k = 0x1fe9; + break; + case 0x1f7a: + k = 0x1fea; + break; + case 0x1f7b: + k = 0x1feb; + break; + case 0x1fe5: + k = 0x1fec; + break; + case 0x1f78: + k = 0x1ff8; + break; + case 0x1f79: + k = 0x1ff9; + break; + case 0x1f7c: + k = 0x1ffa; + break; + case 0x1f7d: + k = 0x1ffb; + break; + case 0x1ff3: + k = 0x1ffc; + break; + default: + k = 0; + } + if (k != 0) + return k; + } + } + else + { + if (c >= 0x2170 && c <= 0x217f) + return (c - 0x10); + + if (c >= 0x24d0 && c <= 0x24e9) + return (c - 0x1a); + + if (c >= 0xff41 && c <= 0xff5a) + return (c - 0x20); + + if (c >= 0x10428 && c <= 0x1044d) + return (c - 0x28); + } + return c; +#else return (c < 0x00ff ? (wint_t)(toupper ((int)c)) : c); +#endif /* _MB_CAPABLE */ } diff --git a/newlib/libc/include/sys/config.h b/newlib/libc/include/sys/config.h index 33e2927..b068e3b 100644 --- a/newlib/libc/include/sys/config.h +++ b/newlib/libc/include/sys/config.h @@ -179,6 +179,7 @@ #if defined(__CYGWIN__) #include <cygwin/config.h> #define __LINUX_ERRNO_EXTENSIONS__ 1 +#define _MB_EXTENDED_CHARSETS_ALL 1 #endif #if defined(__rtems__) @@ -211,4 +212,12 @@ #endif #endif +/* If _MB_EXTENDED_CHARSETS_ALL is set, we want all of the extended + charsets. The extended charsets add a few functions and a couple + of tables of a few K each. */ +#ifdef _MB_EXTENDED_CHARSETS_ALL +#define _MB_EXTENDED_CHARSETS_ISO 1 +#define _MB_EXTENDED_CHARSETS_WINDOWS 1 +#endif + #endif /* __SYS_CONFIG_H__ */ diff --git a/newlib/libc/include/sys/reent.h b/newlib/libc/include/sys/reent.h index 55fb9ff..60eb208 100644 --- a/newlib/libc/include/sys/reent.h +++ b/newlib/libc/include/sys/reent.h @@ -371,8 +371,8 @@ struct _reent int __sdidinit; /* 1 means stdio has been init'd */ - int _current_category; /* used by setlocale */ - _CONST char *_current_locale; + int _current_category; /* unused */ + _CONST char *_current_locale; /* unused */ struct _mprec *_mp; diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c index a4cd30e..d3644eb 100644 --- a/newlib/libc/locale/locale.c +++ b/newlib/libc/locale/locale.c @@ -47,11 +47,18 @@ and <<"C">> values for <[locale]>; strings representing other locales are not honored unless _MB_CAPABLE is defined in which case POSIX locale strings are allowed, plus five extensions supported for backward compatibility with older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>, -<<"C-SJIS">>, or <<"C-ISO-8859-x">> with 1 <= x <= 15. Even when using -POSIX locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>, -<<"EUCJP">>, <<"SJIS">>, or <<"ISO-8859-x">> with 1 <= x <= 15. (<<"">> is -also accepted; if given, the settings are read from the corresponding -LC_* environment variables and $LANG according to POSIX rules. +<<"C-SJIS">>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with +xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, +1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. Even when using POSIX +locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>, +<<"EUCJP">>, <<"SJIS">>, <<"ISO-8859-x">> with 1 <= x <= 15, or +<<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, +874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. +(<<"">> is also accepted; if given, the settings are read from the +corresponding LC_* environment variables and $LANG according to POSIX rules. + +Under Cygwin, this implementation additionally supports the charsets <<"GBK">>, +<<"CP949">>, and <<"BIG5">>. If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a pointer to the string representing the current locale (always @@ -85,6 +92,9 @@ PORTABILITY ANSI C requires <<setlocale>>, but the only locale required across all implementations is the C locale. +NOTES +There is no ISO-8859-12 codepage. It's also refused by this implementation. + No supporting OS subroutines are required. */ @@ -129,6 +139,11 @@ No supporting OS subroutines are required. #include <limits.h> #include <reent.h> #include <stdlib.h> +#include <wchar.h> +#include "../stdlib/local.h" +#ifdef __CYGWIN__ +#include <windows.h> +#endif #define _LC_LAST 7 #define ENCODING_LEN 31 @@ -190,8 +205,8 @@ static const char *__get_locale_env(struct _reent *, int); #endif -static char lc_ctype_charset[ENCODING_LEN + 1] = "ISO-8859-1"; -static char lc_message_charset[ENCODING_LEN + 1] = "ISO-8859-1"; +static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII"; +static char lc_message_charset[ENCODING_LEN + 1] = "ASCII"; char * _DEFUN(_setlocale_r, (p, category, locale), @@ -205,8 +220,6 @@ _DEFUN(_setlocale_r, (p, category, locale), if (strcmp (locale, "POSIX") && strcmp (locale, "C") && strcmp (locale, "")) return NULL; - p->_current_category = category; - p->_current_locale = locale; } return "C"; #else @@ -361,6 +374,11 @@ currentlocale() #endif #ifdef _MB_CAPABLE +#ifdef __CYGWIN__ +extern void *__set_charset_from_codepage (unsigned int, char *charset); +extern void __set_ctype (const char *charset); +#endif /* __CYGWIN__ */ + static char * loadlocale(struct _reent *p, int category) { @@ -382,7 +400,7 @@ loadlocale(struct _reent *p, int category) if (!strcmp (locale, "POSIX")) strcpy (locale, "C"); if (!strcmp (locale, "C")) /* Default "C" locale */ - strcpy (charset, "ISO-8859-1"); + strcpy (charset, "ASCII"); else if (locale[0] == 'C' && locale[1] == '-') /* Old newlib style */ strcpy (charset, locale + 2); else /* POSIX style */ @@ -414,7 +432,11 @@ loadlocale(struct _reent *p, int category) } else if (c[0] == '\0' || c[0] == '@') /* End of string or just a modifier */ +#ifdef __CYGWIN__ + __set_charset_from_codepage (GetACP (), charset); +#else strcpy (charset, "ISO-8859-1"); +#endif else /* Invalid string */ return NULL; @@ -426,42 +448,155 @@ loadlocale(struct _reent *p, int category) if (strcmp (charset, "UTF-8")) return NULL; mbc_max = 6; +#ifdef _MB_CAPABLE + __wctomb = __utf8_wctomb; + __mbtowc = __utf8_mbtowc; +#endif break; case 'J': if (strcmp (charset, "JIS")) return NULL; mbc_max = 8; +#ifdef _MB_CAPABLE + __wctomb = __jis_wctomb; + __mbtowc = __jis_mbtowc; +#endif break; case 'E': - if (strcmp (charset, "EUCJP")) + if (strcmp (charset, "EUCJP") && strcmp (charset, "eucJP")) return NULL; + strcpy (charset, "EUCJP"); mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __eucjp_wctomb; + __mbtowc = __eucjp_mbtowc; +#endif break; case 'S': if (strcmp (charset, "SJIS")) return NULL; mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __sjis_wctomb; + __mbtowc = __sjis_mbtowc; +#endif break; case 'I': - default: - /* Must be exactly one of ISO-8859-1, [...] ISO-8859-15. */ + /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for + ISO-8859-12. */ if (strncmp (charset, "ISO-8859-", 9)) return NULL; - val = strtol (charset + 9, &end, 10); - if (val < 1 || val > 15 || *end) + val = _strtol_r (p, charset + 9, &end, 10); + if (val < 1 || val > 16 || val == 12 || *end) return NULL; mbc_max = 1; +#ifdef _MB_CAPABLE +#ifdef _MB_EXTENDED_CHARSETS_ISO + __wctomb = __iso_wctomb; + __mbtowc = __iso_mbtowc; +#else /* !_MB_EXTENDED_CHARSETS_ISO */ + __wctomb = __ascii_wctomb; + __mbtowc = __ascii_mbtowc; +#endif /* _MB_EXTENDED_CHARSETS_ISO */ +#endif + break; + case 'C': + if (charset[1] != 'P') + return NULL; + val = _strtol_r (p, charset + 2, &end, 10); + if (*end) + return NULL; + switch (val) + { + case 437: + case 720: + case 737: + case 775: + case 850: + case 852: + case 855: + case 857: + case 858: + case 862: + case 866: + case 874: + case 1125: + case 1250: + case 1251: + case 1252: + case 1253: + case 1254: + case 1255: + case 1256: + case 1257: + case 1258: + mbc_max = 1; +#ifdef _MB_CAPABLE +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS + __wctomb = __cp_wctomb; + __mbtowc = __cp_mbtowc; +#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ + __wctomb = __ascii_wctomb; + __mbtowc = __ascii_mbtowc; +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ +#endif + break; +#ifdef __CYGWIN__ + case 949: + mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __kr_wctomb; + __mbtowc = __kr_mbtowc; +#endif + break; +#endif + default: + return NULL; + } + break; + case 'A': + if (strcmp (charset, "ASCII")) + return NULL; + mbc_max = 1; +#ifdef _MB_CAPABLE + __wctomb = __ascii_wctomb; + __mbtowc = __ascii_mbtowc; +#endif break; +#ifdef __CYGWIN__ + case 'G': + if (strcmp (charset, "GBK")) + return NULL; + mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __gbk_wctomb; + __mbtowc = __gbk_mbtowc; +#endif + break; + case 'B': + if (strcmp (charset, "BIG5") && strcmp (charset, "Big5")) + return NULL; + strcpy (charset, "BIG5"); + mbc_max = 2; +#ifdef _MB_CAPABLE + __wctomb = __big5_wctomb; + __mbtowc = __big5_mbtowc; +#endif + break; +#endif /* __CYGWIN__ */ + default: + return NULL; } if (category == LC_CTYPE) { strcpy (lc_ctype_charset, charset); __mb_cur_max = mbc_max; +#ifdef __CYGWIN__ + __set_ctype (charset); +#endif } else if (category == LC_MESSAGES) strcpy (lc_message_charset, charset); - p->_current_category = category; - p->_current_locale = locale; return strcpy(current_categories[category], new_categories[category]); } diff --git a/newlib/libc/stdlib/Makefile.am b/newlib/libc/stdlib/Makefile.am index 74df73a..a2e87d4 100644 --- a/newlib/libc/stdlib/Makefile.am +++ b/newlib/libc/stdlib/Makefile.am @@ -48,6 +48,7 @@ GENERAL_SOURCES = \ rand_r.c \ realloc.c \ reallocf.c \ + sb_charsets.c \ strtod.c \ strtol.c \ strtoul.c \ diff --git a/newlib/libc/stdlib/Makefile.in b/newlib/libc/stdlib/Makefile.in index 4734317..a719054 100644 --- a/newlib/libc/stdlib/Makefile.in +++ b/newlib/libc/stdlib/Makefile.in @@ -38,6 +38,7 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ +LIBOBJDIR = DIST_COMMON = $(srcdir)/../../Makefile.shared $(srcdir)/Makefile.in \ $(srcdir)/Makefile.am subdir = stdlib @@ -87,12 +88,12 @@ am__objects_1 = lib_a-__adjust.$(OBJEXT) lib_a-__atexit.$(OBJEXT) \ lib_a-mlock.$(OBJEXT) lib_a-mprec.$(OBJEXT) \ lib_a-mstats.$(OBJEXT) lib_a-rand.$(OBJEXT) \ lib_a-rand_r.$(OBJEXT) lib_a-realloc.$(OBJEXT) \ - lib_a-reallocf.$(OBJEXT) lib_a-strtod.$(OBJEXT) \ - lib_a-strtol.$(OBJEXT) lib_a-strtoul.$(OBJEXT) \ - lib_a-wcstod.$(OBJEXT) lib_a-wcstol.$(OBJEXT) \ - lib_a-wcstoul.$(OBJEXT) lib_a-wcstombs.$(OBJEXT) \ - lib_a-wcstombs_r.$(OBJEXT) lib_a-wctomb.$(OBJEXT) \ - lib_a-wctomb_r.$(OBJEXT) + lib_a-reallocf.$(OBJEXT) lib_a-sb_charsets.$(OBJEXT) \ + lib_a-strtod.$(OBJEXT) lib_a-strtol.$(OBJEXT) \ + lib_a-strtoul.$(OBJEXT) lib_a-wcstod.$(OBJEXT) \ + lib_a-wcstol.$(OBJEXT) lib_a-wcstoul.$(OBJEXT) \ + lib_a-wcstombs.$(OBJEXT) lib_a-wcstombs_r.$(OBJEXT) \ + lib_a-wctomb.$(OBJEXT) lib_a-wctomb_r.$(OBJEXT) am__objects_2 = lib_a-cxa_atexit.$(OBJEXT) \ lib_a-cxa_finalize.$(OBJEXT) lib_a-drand48.$(OBJEXT) \ lib_a-ecvtbuf.$(OBJEXT) lib_a-efgcvt.$(OBJEXT) \ @@ -137,9 +138,10 @@ am__objects_7 = __adjust.lo __atexit.lo __call_atexit.lo __exp10.lo \ gdtoa-gethex.lo gdtoa-hexnan.lo getenv.lo getenv_r.lo labs.lo \ ldiv.lo ldtoa.lo malloc.lo mblen.lo mblen_r.lo mbstowcs.lo \ mbstowcs_r.lo mbtowc.lo mbtowc_r.lo mlock.lo mprec.lo \ - mstats.lo rand.lo rand_r.lo realloc.lo reallocf.lo strtod.lo \ - strtol.lo strtoul.lo wcstod.lo wcstol.lo wcstoul.lo \ - wcstombs.lo wcstombs_r.lo wctomb.lo wctomb_r.lo + mstats.lo rand.lo rand_r.lo realloc.lo reallocf.lo \ + sb_charsets.lo strtod.lo strtol.lo strtoul.lo wcstod.lo \ + wcstol.lo wcstoul.lo wcstombs.lo wcstombs_r.lo wctomb.lo \ + wctomb_r.lo am__objects_8 = cxa_atexit.lo cxa_finalize.lo drand48.lo ecvtbuf.lo \ efgcvt.lo erand48.lo jrand48.lo lcong48.lo lrand48.lo \ mrand48.lo msize.lo mtrim.lo nrand48.lo rand48.lo seed48.lo \ @@ -287,20 +289,8 @@ STRIP = @STRIP@ USE_LIBTOOL_FALSE = @USE_LIBTOOL_FALSE@ USE_LIBTOOL_TRUE = @USE_LIBTOOL_TRUE@ VERSION = @VERSION@ -ac_ct_AR = @ac_ct_AR@ -ac_ct_AS = @ac_ct_AS@ ac_ct_CC = @ac_ct_CC@ -ac_ct_DLLTOOL = @ac_ct_DLLTOOL@ -ac_ct_DSYMUTIL = @ac_ct_DSYMUTIL@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_ct_LIPO = @ac_ct_LIPO@ -ac_ct_NMEDIT = @ac_ct_NMEDIT@ -ac_ct_OBJDUMP = @ac_ct_OBJDUMP@ -ac_ct_OTOOL = @ac_ct_OTOOL@ -ac_ct_OTOOL64 = @ac_ct_OTOOL64@ -ac_ct_RANLIB = @ac_ct_RANLIB@ -ac_ct_READELF = @ac_ct_READELF@ -ac_ct_STRIP = @ac_ct_STRIP@ aext = @aext@ am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ @@ -316,6 +306,9 @@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ exec_prefix = @exec_prefix@ extra_dir = @extra_dir@ host = @host@ @@ -323,12 +316,14 @@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ +htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ libm_machine_dir = @libm_machine_dir@ +localedir = @localedir@ localstatedir = @localstatedir@ lpfx = @lpfx@ lt_ECHO = @lt_ECHO@ @@ -338,8 +333,10 @@ mkdir_p = @mkdir_p@ newlib_basedir = @newlib_basedir@ oext = @oext@ oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ +psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ subdirs = @subdirs@ @@ -392,6 +389,7 @@ GENERAL_SOURCES = \ rand_r.c \ realloc.c \ reallocf.c \ + sb_charsets.c \ strtod.c \ strtol.c \ strtoul.c \ @@ -872,6 +870,12 @@ lib_a-reallocf.o: reallocf.c lib_a-reallocf.obj: reallocf.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-reallocf.obj `if test -f 'reallocf.c'; then $(CYGPATH_W) 'reallocf.c'; else $(CYGPATH_W) '$(srcdir)/reallocf.c'; fi` +lib_a-sb_charsets.o: sb_charsets.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sb_charsets.o `test -f 'sb_charsets.c' || echo '$(srcdir)/'`sb_charsets.c + +lib_a-sb_charsets.obj: sb_charsets.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sb_charsets.obj `if test -f 'sb_charsets.c'; then $(CYGPATH_W) 'sb_charsets.c'; else $(CYGPATH_W) '$(srcdir)/sb_charsets.c'; fi` + lib_a-strtod.o: strtod.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strtod.o `test -f 'strtod.c' || echo '$(srcdir)/'`strtod.c @@ -1233,7 +1237,7 @@ clean-libtool: -rm -rf .libs _libs distclean-libtool: - -rm -f libtool + -rm -f libtool config.lt uninstall-info-am: ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) diff --git a/newlib/libc/stdlib/gdtoa-gethex.c b/newlib/libc/stdlib/gdtoa-gethex.c index 92f30fc..80f5695 100644 --- a/newlib/libc/stdlib/gdtoa-gethex.c +++ b/newlib/libc/stdlib/gdtoa-gethex.c @@ -35,10 +35,7 @@ THIS SOFTWARE. #include "mprec.h" #include "gdtoa.h" #include "gd_qnan.h" - -#ifdef USE_LOCALE #include "locale.h" -#endif unsigned char hexdig[256]; @@ -151,11 +148,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, sign), int esign, havedig, irv, k, n, nbits, up, zret; __ULong L, lostbits, *x; Long e, e1; -#ifdef USE_LOCALE - unsigned char decimalpoint = *localeconv()->decimal_point; -#else -#define decimalpoint '.' -#endif + unsigned char *decimalpoint = (unsigned char *) + localeconv()->decimal_point; + size_t decp_len = strlen ((const char *) decimalpoint); + unsigned char decp_end = decimalpoint[decp_len - 1]; if (!hexdig['0']) hexdig_init(); @@ -170,9 +166,9 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, sign), e = 0; if (!hexdig[*s]) { zret = 1; - if (*s != decimalpoint) + if (strcmp ((const char *) s, (const char *) decimalpoint) != 0) goto pcheck; - decpt = ++s; + decpt = (s += decp_len); if (!hexdig[*s]) goto pcheck; while(*s == '0') @@ -184,8 +180,9 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, sign), } while(hexdig[*s]) s++; - if (*s == decimalpoint && !decpt) { - decpt = ++s; + if (strcmp ((const char *) s, (const char *) decimalpoint) == 0 + && !decpt) { + decpt = (s += decp_len); while(hexdig[*s]) s++; } @@ -226,8 +223,12 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, sign), n = 0; L = 0; while(s1 > s0) { - if (*--s1 == decimalpoint) + if (*--s1 == decp_end && s1 - decp_len + 1 >= s0 + && strcmp ((const char *) s1 - decp_len + 1, + (const char *) decimalpoint) == 0) { + s1 -= decp_len - 1; /* Note the --s1 above! */ continue; + } if (n == 32) { *x++ = L; L = 0; diff --git a/newlib/libc/stdlib/local.h b/newlib/libc/stdlib/local.h index a274f20..459d98b 100644 --- a/newlib/libc/stdlib/local.h +++ b/newlib/libc/stdlib/local.h @@ -5,4 +5,61 @@ char * _EXFUN(_gcvt,(struct _reent *, double , int , char *, char, int)); +char *__locale_charset (); + +#ifndef __mbstate_t_defined +#include <wchar.h> +#endif + +int (*__wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +int __ascii_wctomb (struct _reent *, char *, wchar_t, const char *, + mbstate_t *); +#ifdef _MB_CAPABLE +int __utf8_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +int __sjis_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +int __eucjp_wctomb (struct _reent *, char *, wchar_t, const char *, + mbstate_t *); +int __jis_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +int __iso_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +int __cp_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +#ifdef __CYGWIN__ +int __gbk_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +int __kr_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +int __big5_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +#endif +#endif + +int (*__mbtowc) (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __ascii_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +#ifdef _MB_CAPABLE +int __utf8_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __sjis_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __eucjp_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __jis_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __iso_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __cp_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +#ifdef __CYGWIN__ +int __gbk_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __kr_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +int __big5_mbtowc (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *); +#endif +#endif + +wchar_t __iso_8859_conv[14][0x60]; +int __iso_8859_index (const char *); + +wchar_t __cp_conv[12][0x80]; +int __cp_index (const char *); + #endif diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c index d3856e0..78bde4b 100644 --- a/newlib/libc/stdlib/mbtowc_r.c +++ b/newlib/libc/stdlib/mbtowc_r.c @@ -5,10 +5,53 @@ #include <wchar.h> #include <string.h> #include <errno.h> +#include "local.h" -#ifdef _MB_CAPABLE -extern char *__locale_charset (); +int (*__mbtowc) (struct _reent *, wchar_t *, const char *, size_t, + const char *, mbstate_t *) + = __ascii_mbtowc; + +int +_DEFUN (_mbtowc_r, (r, pwc, s, n, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + mbstate_t *state) +{ + return __mbtowc (r, pwc, s, n, __locale_charset (), state); +} +int +_DEFUN (__ascii_mbtowc, (r, pwc, s, n, charset, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + const char *charset _AND + mbstate_t *state) +{ + wchar_t dummy; + unsigned char *t = (unsigned char *)s; + + if (pwc == NULL) + pwc = &dummy; + + if (s == NULL) + return 0; + + if (n == 0) + return -2; + + *pwc = (wchar_t)*t; + + if (*t == '\0') + return 0; + + return 1; +} + +#ifdef _MB_CAPABLE typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE; typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR, @@ -43,17 +86,18 @@ static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = { /* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR }, /* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR }, }; -#endif /* _MB_CAPABLE */ /* we override the mbstate_t __count field for more complex encodings and use it store a state value */ #define __state __count +#ifdef _MB_EXTENDED_CHARSETS_ISO int -_DEFUN (_mbtowc_r, (r, pwc, s, n, state), - struct _reent *r _AND - wchar_t *pwc _AND - const char *s _AND - size_t n _AND +_DEFUN (__iso_mbtowc, (r, pwc, s, n, charset, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + const char *charset _AND mbstate_t *state) { wchar_t dummy; @@ -62,358 +106,394 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), if (pwc == NULL) pwc = &dummy; - if (s != NULL && n == 0) + if (s == NULL) + return 0; + + if (n == 0) return -2; -#ifdef _MB_CAPABLE - if (strlen (__locale_charset ()) <= 1) - { /* fall-through */ } - else if (!strcmp (__locale_charset (), "UTF-8")) + if (*t >= 0xa0) { - int ch; - int i = 0; - - if (s == NULL) - return 0; /* UTF-8 character encodings are not state-dependent */ - - if (state->__count == 4) + int iso_idx = __iso_8859_index (charset + 9); + if (iso_idx >= 0) { - /* Create the second half of the surrogate pair. For a description - see the comment below. */ - wint_t tmp = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18) - | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12) - | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6) - | (wchar_t)(state->__value.__wchb[3] & 0x3f); - state->__count = 0; - *pwc = 0xdc00 | ((tmp - 0x10000) & 0x3ff); - return 2; - } - if (state->__count == 0) - ch = t[i++]; - else - { - if (n < (size_t)-1) - ++n; - ch = state->__value.__wchb[0]; - } - - if (ch == '\0') - { - *pwc = 0; - state->__count = 0; - return 0; /* s points to the null character */ - } - - if (ch >= 0x0 && ch <= 0x7f) - { - /* single-byte sequence */ - state->__count = 0; - *pwc = ch; - return 1; - } - else if (ch >= 0xc0 && ch <= 0xdf) - { - /* two-byte sequence */ - state->__value.__wchb[0] = ch; - state->__count = 1; - if (n < 2) - return -2; - ch = t[i++]; - if (ch < 0x80 || ch > 0xbf) + *pwc = __iso_8859_conv[iso_idx][*t - 0xa0]; + if (*pwc == 0) /* Invalid character */ { r->_errno = EILSEQ; return -1; } - if (state->__value.__wchb[0] < 0xc2) - { - /* overlong UTF-8 sequence */ - r->_errno = EILSEQ; - return -1; - } - state->__count = 0; - *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6) - | (wchar_t)(ch & 0x3f); - return i; + return 1; } - else if (ch >= 0xe0 && ch <= 0xef) + } + + *pwc = (wchar_t) *t; + + if (*t == '\0') + return 0; + + return 1; +} +#endif /* _MB_EXTENDED_CHARSETS_ISO */ + +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS +int +_DEFUN (__cp_mbtowc, (r, pwc, s, n, charset, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + const char *charset _AND + mbstate_t *state) +{ + wchar_t dummy; + unsigned char *t = (unsigned char *)s; + + if (pwc == NULL) + pwc = &dummy; + + if (s == NULL) + return 0; + + if (n == 0) + return -2; + + if (*t >= 0x80) + { + int cp_idx = __cp_index (charset + 2); + if (cp_idx >= 0) { - /* three-byte sequence */ - wchar_t tmp; - state->__value.__wchb[0] = ch; - if (state->__count == 0) - state->__count = 1; - else if (n < (size_t)-1) - ++n; - if (n < 2) - return -2; - ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; - if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0) - { - /* overlong UTF-8 sequence */ - r->_errno = EILSEQ; - return -1; - } - if (ch < 0x80 || ch > 0xbf) - { - r->_errno = EILSEQ; - return -1; - } - state->__value.__wchb[1] = ch; - state->__count = 2; - if (n < 3) - return -2; - ch = t[i++]; - if (ch < 0x80 || ch > 0xbf) + *pwc = __cp_conv[cp_idx][*t - 0x80]; + if (*pwc == 0) /* Invalid character */ { r->_errno = EILSEQ; return -1; } - state->__count = 0; - tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12) - | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6) - | (wchar_t)(ch & 0x3f); - - if (tmp >= 0xd800 && tmp <= 0xdfff) - { - r->_errno = EILSEQ; - return -1; - } - *pwc = tmp; - return i; + return 1; } - else if (ch >= 0xf0 && ch <= 0xf7) + } + + *pwc = (wchar_t)*t; + + if (*t == '\0') + return 0; + + return 1; +} +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ + +int +_DEFUN (__utf8_mbtowc, (r, pwc, s, n, charset, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + const char *charset _AND + mbstate_t *state) +{ + wchar_t dummy; + unsigned char *t = (unsigned char *)s; + int ch; + int i = 0; + + if (pwc == NULL) + pwc = &dummy; + + if (s == NULL) + return 0; + + if (n == 0) + return -2; + + if (state->__count == 4) + { + /* Create the second half of the surrogate pair. For a description + see the comment below. */ + wint_t tmp = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18) + | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12) + | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6) + | (wchar_t)(state->__value.__wchb[3] & 0x3f); + state->__count = 0; + *pwc = 0xdc00 | ((tmp - 0x10000) & 0x3ff); + return 2; + } + if (state->__count == 0) + ch = t[i++]; + else + { + if (n < (size_t)-1) + ++n; + ch = state->__value.__wchb[0]; + } + + if (ch == '\0') + { + *pwc = 0; + state->__count = 0; + return 0; /* s points to the null character */ + } + + if (ch >= 0x0 && ch <= 0x7f) + { + /* single-byte sequence */ + state->__count = 0; + *pwc = ch; + return 1; + } + if (ch >= 0xc0 && ch <= 0xdf) + { + /* two-byte sequence */ + state->__value.__wchb[0] = ch; + state->__count = 1; + if (n < 2) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) { - /* four-byte sequence */ - wint_t tmp; - state->__value.__wchb[0] = ch; - if (state->__count == 0) - state->__count = 1; - else if (n < (size_t)-1) - ++n; - if (n < 2) - return -2; - ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; - if (state->__value.__wchb[0] == 0xf0 && ch < 0x90) - { - /* overlong UTF-8 sequence */ - r->_errno = EILSEQ; - return -1; - } - if (ch < 0x80 || ch > 0xbf) - { - r->_errno = EILSEQ; - return -1; - } - state->__value.__wchb[1] = ch; - if (state->__count == 1) - state->__count = 2; - else if (n < (size_t)-1) - ++n; - if (n < 3) - return -2; - ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; - if (ch < 0x80 || ch > 0xbf) - { - r->_errno = EILSEQ; - return -1; - } - state->__value.__wchb[2] = ch; - state->__count = 3; - if (n < 4) - return -2; - ch = t[i++]; - if (ch < 0x80 || ch > 0xbf) - { - r->_errno = EILSEQ; - return -1; - } - tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18) - | (wint_t)((state->__value.__wchb[1] & 0x3f) << 12) - | (wint_t)((state->__value.__wchb[2] & 0x3f) << 6) - | (wint_t)(ch & 0x3f); - if (tmp > 0xffff && sizeof(wchar_t) == 2) - { - /* On systems which have wchar_t being UTF-16 values, the value - doesn't fit into a single wchar_t in this case. So what we - do here is to store the state with a special value of __count - and return the first half of a surrogate pair. As return - value we choose to return the half of the actual UTF-8 char. - The second half is returned in case we recognize the special - __count value above. */ - state->__value.__wchb[3] = ch; - state->__count = 4; - *pwc = 0xd800 | (((tmp - 0x10000) >> 10) & 0x3ff); - return 2; - } - *pwc = tmp; - state->__count = 0; - return i; + r->_errno = EILSEQ; + return -1; } - else + if (state->__value.__wchb[0] < 0xc2) { + /* overlong UTF-8 sequence */ r->_errno = EILSEQ; return -1; } - } - else if (!strcmp (__locale_charset (), "SJIS")) + state->__count = 0; + *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6) + | (wchar_t)(ch & 0x3f); + return i; + } + if (ch >= 0xe0 && ch <= 0xef) { - int ch; - int i = 0; - if (s == NULL) - return 0; /* not state-dependent */ - ch = t[i++]; + /* three-byte sequence */ + wchar_t tmp; + state->__value.__wchb[0] = ch; if (state->__count == 0) + state->__count = 1; + else if (n < (size_t)-1) + ++n; + if (n < 2) + return -2; + ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; + if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0) { - if (_issjis1 (ch)) - { - state->__value.__wchb[0] = ch; - state->__count = 1; - if (n <= 1) - return -2; - ch = t[i++]; - } + /* overlong UTF-8 sequence */ + r->_errno = EILSEQ; + return -1; } - if (state->__count == 1) + if (ch < 0x80 || ch > 0xbf) { - if (_issjis2 (ch)) - { - *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; - state->__count = 0; - return i; - } - else - { - r->_errno = EILSEQ; - return -1; - } + r->_errno = EILSEQ; + return -1; + } + state->__value.__wchb[1] = ch; + state->__count = 2; + if (n < 3) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + { + r->_errno = EILSEQ; + return -1; } + state->__count = 0; + tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12) + | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6) + | (wchar_t)(ch & 0x3f); + + if (tmp >= 0xd800 && tmp <= 0xdfff) + { + r->_errno = EILSEQ; + return -1; + } + *pwc = tmp; + return i; } - else if (!strcmp (__locale_charset (), "EUCJP")) + if (ch >= 0xf0 && ch <= 0xf7) { - int ch; - int i = 0; - if (s == NULL) - return 0; /* not state-dependent */ - ch = t[i++]; + /* four-byte sequence */ + wint_t tmp; + state->__value.__wchb[0] = ch; if (state->__count == 0) + state->__count = 1; + else if (n < (size_t)-1) + ++n; + if (n < 2) + return -2; + ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; + if (state->__value.__wchb[0] == 0xf0 && ch < 0x90) { - if (_iseucjp (ch)) - { - state->__value.__wchb[0] = ch; - state->__count = 1; - if (n <= 1) - return -2; - ch = t[i++]; - } + /* overlong UTF-8 sequence */ + r->_errno = EILSEQ; + return -1; + } + if (ch < 0x80 || ch > 0xbf) + { + r->_errno = EILSEQ; + return -1; } + state->__value.__wchb[1] = ch; if (state->__count == 1) + state->__count = 2; + else if (n < (size_t)-1) + ++n; + if (n < 3) + return -2; + ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; + if (ch < 0x80 || ch > 0xbf) { - if (_iseucjp (ch)) - { - *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; - state->__count = 0; - return i; - } - else - { - r->_errno = EILSEQ; - return -1; - } + r->_errno = EILSEQ; + return -1; + } + state->__value.__wchb[2] = ch; + state->__count = 3; + if (n < 4) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + { + r->_errno = EILSEQ; + return -1; } + tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18) + | (wint_t)((state->__value.__wchb[1] & 0x3f) << 12) + | (wint_t)((state->__value.__wchb[2] & 0x3f) << 6) + | (wint_t)(ch & 0x3f); + if (tmp > 0xffff && sizeof(wchar_t) == 2) + { + /* On systems which have wchar_t being UTF-16 values, the value + doesn't fit into a single wchar_t in this case. So what we + do here is to store the state with a special value of __count + and return the first half of a surrogate pair. As return + value we choose to return the half of the actual UTF-8 char. + The second half is returned in case we recognize the special + __count value above. */ + state->__value.__wchb[3] = ch; + state->__count = 4; + *pwc = 0xd800 | (((tmp - 0x10000) >> 10) & 0x3ff); + return 2; + } + *pwc = tmp; + state->__count = 0; + return i; } - else if (!strcmp (__locale_charset (), "JIS")) - { - JIS_STATE curr_state; - JIS_ACTION action; - JIS_CHAR_TYPE ch; - unsigned char *ptr; - unsigned int i; - int curr_ch; - - if (s == NULL) - { - state->__state = ASCII; - return 1; /* state-dependent */ - } - - curr_state = state->__state; - ptr = t; - - for (i = 0; i < n; ++i) - { - curr_ch = t[i]; - switch (curr_ch) - { - case ESC_CHAR: - ch = ESCAPE; - break; - case '$': - ch = DOLLAR; - break; - case '@': - ch = AT; - break; - case '(': - ch = BRACKET; - break; - case 'B': - ch = B; - break; - case 'J': - ch = J; - break; - case '\0': - ch = NUL; - break; - default: - if (_isjis (curr_ch)) - ch = JIS_CHAR; - else - ch = OTHER; - } - action = JIS_action_table[curr_state][ch]; - curr_state = JIS_state_table[curr_state][ch]; - - switch (action) - { - case NOOP: - break; - case EMPTY: - state->__state = ASCII; - *pwc = (wchar_t)0; - return 0; - case COPY_A: - state->__state = ASCII; - *pwc = (wchar_t)*ptr; - return (i + 1); - case COPY_J1: - state->__value.__wchb[0] = t[i]; - break; - case COPY_J2: - state->__state = JIS; - *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]); - return (i + 1); - case MAKE_A: - ptr = (unsigned char *)(t + i + 1); - break; - case ERROR: - default: - r->_errno = EILSEQ; - return -1; - } + r->_errno = EILSEQ; + return -1; +} - } +/* Cygwin defines its own doublebyte charset conversion functions + because the underlying OS requires wchar_t == UTF-16. */ +#ifndef __CYGWIN__ +int +_DEFUN (__sjis_mbtowc, (r, pwc, s, n, charset, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + const char *charset _AND + mbstate_t *state) +{ + wchar_t dummy; + unsigned char *t = (unsigned char *)s; + int ch; + int i = 0; - state->__state = curr_state; - return -2; /* n < bytes needed */ - } -#endif /* _MB_CAPABLE */ + if (pwc == NULL) + pwc = &dummy; - /* otherwise this must be the "C" locale or unknown locale */ if (s == NULL) return 0; /* not state-dependent */ + if (n == 0) + return -2; + + ch = t[i++]; + if (state->__count == 0) + { + if (_issjis1 (ch)) + { + state->__value.__wchb[0] = ch; + state->__count = 1; + if (n <= 1) + return -2; + ch = t[i++]; + } + } + if (state->__count == 1) + { + if (_issjis2 (ch)) + { + *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; + state->__count = 0; + return i; + } + else + { + r->_errno = EILSEQ; + return -1; + } + } + + *pwc = (wchar_t)*t; + + if (*t == '\0') + return 0; + + return 1; +} + +int +_DEFUN (__eucjp_mbtowc, (r, pwc, s, n, charset, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + const char *charset _AND + mbstate_t *state) +{ + wchar_t dummy; + unsigned char *t = (unsigned char *)s; + int ch; + int i = 0; + + if (pwc == NULL) + pwc = &dummy; + + if (s == NULL) + return 0; + + if (n == 0) + return -2; + + ch = t[i++]; + if (state->__count == 0) + { + if (_iseucjp (ch)) + { + state->__value.__wchb[0] = ch; + state->__count = 1; + if (n <= 1) + return -2; + ch = t[i++]; + } + } + if (state->__count == 1) + { + if (_iseucjp (ch)) + { + *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; + state->__count = 0; + return i; + } + else + { + r->_errno = EILSEQ; + return -1; + } + } + *pwc = (wchar_t)*t; if (*t == '\0') @@ -421,3 +501,108 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), return 1; } + +int +_DEFUN (__jis_mbtowc, (r, pwc, s, n, charset, state), + struct _reent *r _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + const char *charset _AND + mbstate_t *state) +{ + wchar_t dummy; + unsigned char *t = (unsigned char *)s; + JIS_STATE curr_state; + JIS_ACTION action; + JIS_CHAR_TYPE ch; + unsigned char *ptr; + unsigned int i; + int curr_ch; + + if (pwc == NULL) + pwc = &dummy; + + if (s == NULL) + { + state->__state = ASCII; + return 1; /* state-dependent */ + } + + if (n == 0) + return -2; + + curr_state = state->__state; + ptr = t; + + for (i = 0; i < n; ++i) + { + curr_ch = t[i]; + switch (curr_ch) + { + case ESC_CHAR: + ch = ESCAPE; + break; + case '$': + ch = DOLLAR; + break; + case '@': + ch = AT; + break; + case '(': + ch = BRACKET; + break; + case 'B': + ch = B; + break; + case 'J': + ch = J; + break; + case '\0': + ch = NUL; + break; + default: + if (_isjis (curr_ch)) + ch = JIS_CHAR; + else + ch = OTHER; + } + + action = JIS_action_table[curr_state][ch]; + curr_state = JIS_state_table[curr_state][ch]; + + switch (action) + { + case NOOP: + break; + case EMPTY: + state->__state = ASCII; + *pwc = (wchar_t)0; + return 0; + case COPY_A: + state->__state = ASCII; + *pwc = (wchar_t)*ptr; + return (i + 1); + case COPY_J1: + state->__value.__wchb[0] = t[i]; + break; + case COPY_J2: + state->__state = JIS; + *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]); + return (i + 1); + case MAKE_A: + ptr = (unsigned char *)(t + i + 1); + break; + case ERROR: + default: + r->_errno = EILSEQ; + return -1; + } + + } + + state->__state = curr_state; + return -2; /* n < bytes needed */ +} +#endif /* !__CYGWIN__*/ +#endif /* _MB_CAPABLE */ diff --git a/newlib/libc/stdlib/sb_charsets.c b/newlib/libc/stdlib/sb_charsets.c new file mode 100644 index 0000000..3fd0158 --- /dev/null +++ b/newlib/libc/stdlib/sb_charsets.c @@ -0,0 +1,697 @@ +#include <newlib.h> +#include <wchar.h> + +#ifdef _MB_CAPABLE +extern char *__locale_charset (); + +#ifdef _MB_EXTENDED_CHARSETS_ISO +/* Tables for the ISO-8859-x to UTF conversion. The first index into the + table is a value computed from the value x (function __iso_8859_index), + the second index is the value of the incoming character - 0xa0. + Values < 0xa0 don't have to be converted anyway. */ +wchar_t __iso_8859_conv[14][0x60] = { + /* ISO-8859-2 */ + { 0xa0, 0x104, 0x2d8, 0x141, 0xa4, 0x13d, 0x15a, 0xa7, + 0xa8, 0x160, 0x15e, 0x164, 0x179, 0xad, 0x17d, 0x17b, + 0xb0, 0x105, 0x2db, 0x142, 0xb4, 0x13e, 0x15b, 0x2c7, + 0xb8, 0x161, 0x15f, 0x165, 0x17a, 0x2dd, 0x17e, 0x17c, + 0x154, 0xc1, 0xc2, 0x102, 0xc4, 0x139, 0x106, 0xc7, + 0x10c, 0xc9, 0x118, 0xcb, 0x11a, 0xcd, 0xce, 0x10e, + 0x110, 0x143, 0x147, 0xd3, 0xd4, 0x150, 0xd6, 0xd7, + 0x158, 0x16e, 0xda, 0x170, 0xdc, 0xdd, 0x162, 0xdf, + 0x155, 0xe1, 0xe2, 0x103, 0xe4, 0x13a, 0x107, 0xe7, + 0x10d, 0xe9, 0x119, 0xeb, 0x11b, 0xed, 0xee, 0x10f, + 0x111, 0x144, 0x148, 0xf3, 0xf4, 0x151, 0xf6, 0xf7, + 0x159, 0x16f, 0xfa, 0x171, 0xfc, 0xfd, 0x163, 0x2d9 }, + /* ISO-8859-3 */ + { 0xa0, 0x126, 0x2d8, 0xa3, 0xa4, 0x0, 0x124, 0xa7, + 0xa8, 0x130, 0x15e, 0x11e, 0x134, 0xad, 0x0, 0x17b, + 0xb0, 0x127, 0xb2, 0xb3, 0xb4, 0xb5, 0x125, 0xb7, + 0xb8, 0x131, 0x15f, 0x11f, 0x135, 0xbd, 0x0, 0x17c, + 0xc0, 0xc1, 0xc2, 0x0, 0xc4, 0x10a, 0x108, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0x0, 0xd1, 0xd2, 0xd3, 0xd4, 0x120, 0xd6, 0xd7, + 0x11c, 0xd9, 0xda, 0xdb, 0xdc, 0x16c, 0x15c, 0xdf, + 0xe0, 0xe1, 0xe2, 0x0, 0xe4, 0x10b, 0x109, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0x0, 0xf1, 0xf2, 0xf3, 0xf4, 0x121, 0xf6, 0xf7, + 0x11d, 0xf9, 0xfa, 0xfb, 0xfc, 0x16d, 0x15d, 0x2d9 }, + /* ISO-8859-4 */ + { 0xa0, 0x104, 0x138, 0x156, 0xa4, 0x128, 0x13b, 0xa7, + 0xa8, 0x160, 0x112, 0x122, 0x166, 0xad, 0x17d, 0xaf, + 0xb0, 0x105, 0x2db, 0x157, 0xb4, 0x129, 0x13c, 0x2c7, + 0xb8, 0x161, 0x113, 0x123, 0x167, 0x14a, 0x17e, 0x14b, + 0x100, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0x12e, + 0x10c, 0xc9, 0x118, 0xcb, 0x116, 0xcd, 0xce, 0x12a, + 0x110, 0x145, 0x14c, 0x136, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0x172, 0xda, 0xdb, 0xdc, 0x168, 0x16a, 0xdf, + 0x101, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0x12f, + 0x10d, 0xe9, 0x119, 0xeb, 0x117, 0xed, 0xee, 0x12b, + 0x111, 0x146, 0x14d, 0x137, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0x173, 0xfa, 0xfb, 0xfc, 0x169, 0x16b, 0x2d9 }, + /* ISO-8859-5 */ + { 0xa0, 0x401, 0x402, 0x403, 0x404, 0x405, 0x406, 0x407, + 0x408, 0x409, 0x40a, 0x40b, 0x40c, 0xad, 0x40e, 0x40f, + 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, + 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, + 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, + 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, + 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, + 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, + 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f, + 0x2116, 0x451, 0x452, 0x453, 0x454, 0x455, 0x456, 0x457, + 0x458, 0x459, 0x45a, 0x45b, 0x45c, 0xa7, 0x45e, 0x45f }, + /* ISO-8859-6 */ + { 0xa0, 0x0, 0x0, 0x0, 0xa4, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x60c, 0xad, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x61b, 0x0, 0x0, 0x0, 0x61f, + 0x0, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627, + 0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f, + 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0x637, + 0x638, 0x639, 0x63a, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x640, 0x641, 0x642, 0x643, 0x644, 0x645, 0x646, 0x647, + 0x648, 0x649, 0x64a, 0x64b, 0x64c, 0x64d, 0x64e, 0x64f, + 0x650, 0x651, 0x652, 0x64b, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }, + /* ISO-8859-7 */ + { 0xa0, 0x2018, 0x2019, 0xa3, 0x20ac, 0x20af, 0xa6, 0xa7, + 0xa8, 0xa9, 0x37a, 0xab, 0xac, 0xad, 0x0, 0x2015, + 0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0x385, 0x386, 0xb7, + 0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f, + 0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, + 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f, + 0x3a0, 0x3a1, 0x0, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, + 0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af, + 0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, + 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf, + 0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7, + 0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xff }, + /* ISO-8859-8 */ + { 0xa0, 0x0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2017, + 0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7, + 0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df, + 0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7, + 0x5e8, 0x5e9, 0x5ea, 0x0, 0x0, 0x200e, 0x200f, 0x200e }, + /* ISO-8859-9 */ + { 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0x11e, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x130, 0x15e, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0x11f, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff }, + /* ISO-8859-10 */ + { 0xa0, 0x104, 0x112, 0x122, 0x12a, 0x128, 0x136, 0xa7, + 0x13b, 0x110, 0x160, 0x166, 0x17d, 0xad, 0x16a, 0x14a, + 0xb0, 0x105, 0x113, 0x123, 0x12b, 0x129, 0x137, 0xb7, + 0x13c, 0x111, 0x161, 0x167, 0x17e, 0x2015, 0x16b, 0x14b, + 0x100, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0x12e, + 0x10c, 0xc9, 0x118, 0xcb, 0x116, 0xcd, 0xce, 0xcf, + 0xd0, 0x145, 0x14c, 0xd3, 0xd4, 0xd5, 0xd6, 0x168, + 0xd8, 0x172, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0x101, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0x12f, + 0x10d, 0xe9, 0x119, 0xeb, 0x117, 0xed, 0xee, 0xef, + 0xf0, 0x146, 0x14d, 0xf3, 0xf4, 0xf5, 0xf6, 0x169, + 0xf8, 0x173, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x138 }, + /* ISO-8859-11 */ + { 0xa0, 0xe01, 0xe02, 0xe03, 0xe04, 0xe05, 0xe06, 0xe07, + 0xe08, 0xe09, 0xe0a, 0xe0b, 0xe0c, 0xe0d, 0xe0e, 0xe0f, + 0xe10, 0xe11, 0xe12, 0xe13, 0xe14, 0xe15, 0xe16, 0xe17, + 0xe18, 0xe19, 0xe1a, 0xe1b, 0xe1c, 0xe1d, 0xe1e, 0xe1f, + 0xe20, 0xe21, 0xe22, 0xe23, 0xe24, 0xe25, 0xe26, 0xe27, + 0xe28, 0xe29, 0xe2a, 0xe2b, 0xe2c, 0xe2d, 0xe2e, 0xe2f, + 0xe30, 0xe31, 0xe32, 0xe33, 0xe34, 0xe35, 0xe36, 0xe37, + 0xe38, 0xe39, 0xe3a, 0x0, 0x0, 0x0, 0x0, 0xe3f, + 0xe40, 0xe41, 0xe42, 0xe43, 0xe44, 0xe45, 0xe46, 0xe47, + 0xe48, 0xe49, 0xe4a, 0xe4b, 0xe4c, 0xe4d, 0xe4e, 0xe4f, + 0xe50, 0xe51, 0xe52, 0xe53, 0xe54, 0xe55, 0xe56, 0xe57, + 0xe58, 0xe59, 0xe5a, 0xe5b, 0xe31, 0xe34, 0xe47, 0xff }, + /* ISO-8859-12 doesn't exist. The below code decrements the index + into the table by one for ISO numbers > 12. */ + /* ISO-8859-13 */ + { 0xa0, 0x201d, 0xa2, 0xa3, 0xa4, 0x201e, 0xa6, 0xa7, + 0xd8, 0xa9, 0x156, 0xab, 0xac, 0xad, 0xae, 0xc6, + 0xb0, 0xb1, 0xb2, 0xb3, 0x201c, 0xb5, 0xb6, 0xb7, + 0xf8, 0xb9, 0x157, 0xbb, 0xbc, 0xbd, 0xbe, 0xe6, + 0x104, 0x12e, 0x100, 0x106, 0xc4, 0xc5, 0x118, 0x112, + 0x10c, 0xc9, 0x179, 0x116, 0x122, 0x136, 0x12a, 0x13b, + 0x160, 0x143, 0x145, 0xd3, 0x14c, 0xd5, 0xd6, 0xd7, + 0x172, 0x141, 0x15a, 0x16a, 0xdc, 0x17b, 0x17d, 0xdf, + 0x105, 0x12f, 0x101, 0x107, 0xe4, 0xe5, 0x119, 0x113, + 0x10d, 0xe9, 0x17a, 0x117, 0x123, 0x137, 0x12b, 0x13c, + 0x161, 0x144, 0x146, 0xf3, 0x14d, 0xf5, 0xf6, 0xf7, + 0x173, 0x142, 0x15b, 0x16b, 0xfc, 0x17c, 0x17e, 0x2019 }, + /* ISO-8859-14 */ + { 0xa0, 0x1e02, 0x1e03, 0xa3, 0x10a, 0x10b, 0x1e0a, 0xa7, + 0x1e80, 0xa9, 0x1e82, 0x1e0b, 0x1ef2, 0xad, 0xae, 0x178, + 0x1e1e, 0x1e1f, 0x120, 0x121, 0x1e40, 0x1e41, 0xb6, 0x1e56, + 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0x174, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0x1e6a, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0x176, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0x175, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x1e6b, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0x177, 0xff }, + /* ISO-8859-15 */ + { 0xa0, 0xa1, 0xa2, 0xa3, 0x20ac, 0xa5, 0x160, 0xa7, + 0x161, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0x17d, 0xb5, 0xb6, 0xb7, + 0x17e, 0xb9, 0xba, 0xbb, 0x152, 0x153, 0x178, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }, + /* ISO-8859-16 */ + { 0xa0, 0x104, 0x105, 0x141, 0x20ac, 0x201e, 0x160, 0xa7, + 0x161, 0xa9, 0x218, 0xab, 0x179, 0xad, 0x17a, 0x17b, + 0xb0, 0xb1, 0x10c, 0x142, 0x17d, 0x201d, 0xb6, 0xb7, + 0x17e, 0x10d, 0x219, 0xbb, 0x152, 0x153, 0x178, 0x17c, + 0xc0, 0xc1, 0xc2, 0x102, 0xc4, 0x106, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0x110, 0x143, 0xd2, 0xd3, 0xd4, 0x150, 0xd6, 0x15a, + 0x170, 0xd9, 0xda, 0xdb, 0xdc, 0x118, 0x21a, 0xdf, + 0xe0, 0xe1, 0xe2, 0x103, 0xe4, 0x107, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0x111, 0x144, 0xf2, 0xf3, 0xf4, 0x151, 0xf6, 0x15b, + 0x171, 0xf9, 0xfa, 0xfb, 0xfc, 0x119, 0x21b, 0xff } +}; +#endif /* _MB_EXTENDED_CHARSETS_ISO */ + +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS +/* Tables for the Windows default singlebyte ANSI codepage conversion. + The first index into the table is a value computed from the codepage + value (function __cp_index), the second index is the value of the + incoming character - 0x80. + Values < 0x80 don't have to be converted anyway. */ +wchar_t __cp_conv[22][0x80] = { + /* CP437 */ + { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, + 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, + 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, + 0xff, 0xd6, 0xdc, 0xa2, 0xa3, 0xa5, 0x20a7, 0x192, + 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, + 0xbf, 0x2310, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x3b1, 0xdf, 0x393, 0x3c0, 0x3a3, 0x3c3, 0xb5, 0x3c4, + 0x3a6, 0x398, 0x3a9, 0x3b4, 0x221e, 0x3c6, 0x3b5, 0x2229, + 0x2261, 0xb1, 0x2265, 0x2264, 0x2320, 0x2321, 0xf7, 0x2248, + 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0 }, + /* CP720 */ + { 0x0, 0x0, 0xe9, 0xe2, 0x0, 0xe0, 0x0, 0xe7, + 0xea, 0xeb, 0xe8, 0xef, 0xee, 0x0, 0x0, 0x0, + 0x0, 0x651, 0x652, 0xf4, 0xa4, 0x640, 0xfb, 0xf9, + 0x621, 0x622, 0x623, 0x624, 0xa3, 0x625, 0x626, 0x627, + 0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f, + 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x636, 0x637, 0x638, 0x639, 0x63a, 0x641, 0xb5, 0x642, + 0x643, 0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, + 0x2261, 0x64b, 0x64c, 0x64d, 0x64e, 0x64f, 0x650, 0x2248, + 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0 }, + /* CP737 */ + { 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, 0x398, + 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f, 0x3a0, + 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, + 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, 0x3b8, + 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf, 0x3c0, + 0x3c1, 0x3c3, 0x3c2, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x3c9, 0x3ac, 0x3ad, 0x3ae, 0x3ca, 0x3af, 0x3cc, 0x3cd, + 0x3cb, 0x3ce, 0x386, 0x388, 0x389, 0x38a, 0x38c, 0x38e, + 0x38f, 0xb1, 0x2265, 0x2264, 0x3aa, 0x3ab, 0xf7, 0x2248, + 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0 }, + /* CP775 */ + { 0x106, 0xfc, 0xe9, 0x101, 0xe4, 0x123, 0xe5, 0x107, + 0x142, 0x113, 0x156, 0x157, 0x12b, 0x179, 0xc4, 0xc5, + 0xc9, 0xe6, 0xc6, 0x14d, 0xf6, 0x122, 0xa2, 0x15a, + 0x15b, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0xa4, + 0x100, 0x12a, 0xf3, 0x17b, 0x17c, 0x17a, 0x201d, 0xa6, + 0xa9, 0xae, 0xac, 0xbd, 0xbc, 0x141, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x104, 0x10c, 0x118, + 0x116, 0x2563, 0x2551, 0x2557, 0x255d, 0x12e, 0x160, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x172, 0x16a, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x17d, + 0x105, 0x10d, 0x119, 0x117, 0x12f, 0x161, 0x173, 0x16b, + 0x17e, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0xd3, 0xdf, 0x14c, 0x143, 0xf5, 0xd5, 0xb5, 0x144, + 0x136, 0x137, 0x13b, 0x13c, 0x146, 0x112, 0x145, 0x2019, + 0xad, 0xb1, 0x201c, 0xbe, 0xb6, 0xa7, 0xf7, 0x201e, + 0xb0, 0x2219, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0 }, + /* CP850 */ + { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, + 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, + 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, + 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x192, + 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, + 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0xc0, + 0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4, + 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x131, 0xcd, 0xce, + 0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580, + 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, + 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0xb4, + 0xad, 0xb1, 0x2017, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, + 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0 }, + /* CP852 */ + { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0x16f, 0x107, 0xe7, + 0x142, 0xeb, 0x150, 0x151, 0xee, 0x179, 0xc4, 0x106, + 0xc9, 0x139, 0x13a, 0xf4, 0xf6, 0x13d, 0x13e, 0x15a, + 0x15b, 0xd6, 0xdc, 0x164, 0x165, 0x141, 0xd7, 0x10d, + 0xe1, 0xed, 0xf3, 0xfa, 0x104, 0x105, 0x17d, 0x17e, + 0x118, 0x119, 0xac, 0x17a, 0x10c, 0x15f, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0x11a, + 0x15e, 0x2563, 0x2551, 0x2557, 0x255d, 0x17b, 0x17c, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x102, 0x103, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4, + 0x111, 0x110, 0x10e, 0xcb, 0x10f, 0x147, 0xcd, 0xce, + 0x11b, 0x2518, 0x250c, 0x2588, 0x2584, 0x162, 0x16e, 0x2580, + 0xd3, 0xdf, 0xd4, 0x143, 0x144, 0x148, 0x160, 0x161, + 0x154, 0xda, 0x155, 0x170, 0xfd, 0xdd, 0x163, 0xb4, + 0xad, 0x2dd, 0x2db, 0x2c7, 0x2d8, 0xa7, 0xf7, 0xb8, + 0xb0, 0xa8, 0x2d9, 0x171, 0x158, 0x159, 0x25a0, 0xa0 }, + /* CP855 */ + { 0x452, 0x402, 0x453, 0x403, 0x451, 0x401, 0x454, 0x404, + 0x455, 0x405, 0x456, 0x406, 0x457, 0x407, 0x458, 0x408, + 0x459, 0x409, 0x45a, 0x40a, 0x45b, 0x40b, 0x45c, 0x40c, + 0x45e, 0x40e, 0x45f, 0x40f, 0x44e, 0x42e, 0x44a, 0x42a, + 0x430, 0x410, 0x431, 0x411, 0x446, 0x426, 0x434, 0x414, + 0x435, 0x415, 0x444, 0x424, 0x433, 0x413, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x445, 0x425, 0x438, + 0x418, 0x2563, 0x2551, 0x2557, 0x255d, 0x439, 0x419, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x43a, 0x41a, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4, + 0x43b, 0x41b, 0x43c, 0x41c, 0x43d, 0x41d, 0x43e, 0x41e, + 0x43f, 0x2518, 0x250c, 0x2588, 0x2584, 0x41f, 0x44f, 0x2580, + 0x42f, 0x440, 0x420, 0x441, 0x421, 0x442, 0x422, 0x443, + 0x423, 0x436, 0x416, 0x432, 0x412, 0x44c, 0x42c, 0x2116, + 0xad, 0x44b, 0x42b, 0x437, 0x417, 0x448, 0x428, 0x44d, + 0x42d, 0x449, 0x429, 0x447, 0x427, 0xa7, 0x25a0, 0xa0 }, + /* CP857 */ + { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, + 0xea, 0xeb, 0xe8, 0xef, 0xee, 0x131, 0xc4, 0xc5, + 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, + 0x130, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0x15e, 0x15f, + 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0x11e, 0x11f, + 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0xc0, + 0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4, + 0xba, 0xaa, 0xca, 0xcb, 0xc8, 0x0, 0xcd, 0xce, + 0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580, + 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0x0, + 0xd7, 0xda, 0xdb, 0xd9, 0xec, 0xff, 0xaf, 0xb4, + 0xad, 0xb1, 0x0, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, + 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0 }, + /* CP858 */ + { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, + 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, + 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, + 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x192, + 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, + 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0xc0, + 0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4, + 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x20ac, 0xcd, 0xce, + 0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580, + 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, + 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0xb4, + 0xad, 0xb1, 0x2017, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, + 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0 }, + /* CP862 */ + { 0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7, + 0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df, + 0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7, + 0x5e8, 0x5e9, 0x5ea, 0xa2, 0xa3, 0xa5, 0x20a7, 0x192, + 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, + 0xbf, 0x2310, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x3b1, 0xdf, 0x393, 0x3c0, 0x3a3, 0x3c3, 0xb5, 0x3c4, + 0x3a6, 0x398, 0x3a9, 0x3b4, 0x221e, 0x3c6, 0x3b5, 0x2229, + 0x2261, 0xb1, 0x2265, 0x2264, 0x2320, 0x2321, 0xf7, 0x2248, + 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0 }, + /* CP866 */ + { 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, + 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, + 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, + 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, + 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, + 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, + 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f, + 0x401, 0x451, 0x404, 0x454, 0x407, 0x457, 0x40e, 0x45e, + 0xb0, 0x2219, 0xb7, 0x221a, 0x2116, 0xa4, 0x25a0, 0xa0 }, + /* CP874 */ + { 0x20ac, 0x0, 0x0, 0x0, 0x0, 0x2026, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xa0, 0xe01, 0xe02, 0xe03, 0xe04, 0xe05, 0xe06, 0xe07, + 0xe08, 0xe09, 0xe0a, 0xe0b, 0xe0c, 0xe0d, 0xe0e, 0xe0f, + 0xe10, 0xe11, 0xe12, 0xe13, 0xe14, 0xe15, 0xe16, 0xe17, + 0xe18, 0xe19, 0xe1a, 0xe1b, 0xe1c, 0xe1d, 0xe1e, 0xe1f, + 0xe20, 0xe21, 0xe22, 0xe23, 0xe24, 0xe25, 0xe26, 0xe27, + 0xe28, 0xe29, 0xe2a, 0xe2b, 0xe2c, 0xe2d, 0xe2e, 0xe2f, + 0xe30, 0xe31, 0xe32, 0xe33, 0xe34, 0xe35, 0xe36, 0xe37, + 0xe38, 0xe39, 0xe3a, 0x0, 0x0, 0x0, 0x0, 0xe3f, + 0xe40, 0xe41, 0xe42, 0xe43, 0xe44, 0xe45, 0xe46, 0xe47, + 0xe48, 0xe49, 0xe4a, 0xe4b, 0xe4c, 0xe4d, 0xe4e, 0xe4f, + 0xe50, 0xe51, 0xe52, 0xe53, 0xe54, 0xe55, 0xe56, 0xe57, + 0xe58, 0xe59, 0xe5a, 0xe5b, 0xfc, 0xfd, 0xfe, 0xff }, + /* CP1125 */ + { 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, + 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, + 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, + 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, + 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, + 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, + 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f, + 0x401, 0x451, 0x490, 0x491, 0x404, 0x454, 0x406, 0x456, + 0x407, 0x457, 0xb7, 0x221a, 0x2116, 0xa4, 0x25a0, 0xa0 }, + /* CP1250 */ + { 0x20ac, 0x0, 0x201a, 0x0, 0x201e, 0x2026, 0x2020, 0x2021, + 0x0, 0x2030, 0x160, 0x2039, 0x15a, 0x164, 0x17d, 0x179, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x0, 0x2122, 0x161, 0x203a, 0x15b, 0x165, 0x17e, 0x17a, + 0xa0, 0x2c7, 0x2d8, 0x141, 0xa4, 0x104, 0xa6, 0xa7, + 0xa8, 0xa9, 0x15e, 0xab, 0xac, 0xad, 0xae, 0x17b, + 0xb0, 0xb1, 0x2db, 0x142, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0x105, 0x15f, 0xbb, 0x13d, 0x2dd, 0x13e, 0x17c, + 0x154, 0xc1, 0xc2, 0x102, 0xc4, 0x139, 0x106, 0xc7, + 0x10c, 0xc9, 0x118, 0xcb, 0x11a, 0xcd, 0xce, 0x10e, + 0x110, 0x143, 0x147, 0xd3, 0xd4, 0x150, 0xd6, 0xd7, + 0x158, 0x16e, 0xda, 0x170, 0xdc, 0xdd, 0x162, 0xdf, + 0x155, 0xe1, 0xe2, 0x103, 0xe4, 0x13a, 0x107, 0xe7, + 0x10d, 0xe9, 0x119, 0xeb, 0x11b, 0xed, 0xee, 0x10f, + 0x111, 0x144, 0x148, 0xf3, 0xf4, 0x151, 0xf6, 0xf7, + 0x159, 0x16f, 0xfa, 0x171, 0xfc, 0xfd, 0x163, 0x2d9 }, + /* CP1251 */ + { 0x402, 0x403, 0x201a, 0x453, 0x201e, 0x2026, 0x2020, 0x2021, + 0x20ac, 0x2030, 0x409, 0x2039, 0x40a, 0x40c, 0x40b, 0x40f, + 0x452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x0, 0x2122, 0x459, 0x203a, 0x45a, 0x45c, 0x45b, 0x45f, + 0xa0, 0x40e, 0x45e, 0x408, 0xa4, 0x490, 0xa6, 0xa7, + 0x401, 0xa9, 0x404, 0xab, 0xac, 0xad, 0xae, 0x407, + 0xb0, 0xb1, 0x406, 0x456, 0x491, 0xb5, 0xb6, 0xb7, + 0x451, 0x2116, 0x454, 0xbb, 0x458, 0x405, 0x455, 0x457, + 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, + 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, + 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, + 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, + 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, + 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, + 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f }, + /* CP1252 */ + { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x0, 0x17d, 0x0, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x0, 0x17e, 0x178, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }, + /* CP1253 */ + { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x0, 0x2030, 0x0, 0x2039, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x0, 0x2122, 0x0, 0x203a, 0x0, 0x0, 0x0, 0x0, + 0xa0, 0x385, 0x386, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0x0, 0xab, 0xac, 0xad, 0xae, 0x2015, + 0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0xb5, 0xb6, 0xb7, + 0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f, + 0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, + 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f, + 0x3a0, 0x3a1, 0x0, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, + 0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af, + 0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, + 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf, + 0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7, + 0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xff }, + /* CP1254 */ + { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x0, 0x0, 0x0, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x0, 0x0, 0x178, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0x11e, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x130, 0x15e, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0x11f, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff }, + /* CP1255 */ + { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x0, 0x2039, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x0, 0x203a, 0x0, 0x0, 0x0, 0x0, + 0xa0, 0xa1, 0xa2, 0xa3, 0x20aa, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0x5b0, 0x5b1, 0x5b2, 0x5b3, 0x5b4, 0x5b5, 0x5b6, 0x5b7, + 0x5b8, 0x5b9, 0x0, 0x5bb, 0x5bc, 0x5bd, 0x5be, 0x5bf, + 0x5c0, 0x5c1, 0x5c2, 0x5c3, 0x5f0, 0x5f1, 0x5f2, 0x5f3, + 0x5f4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7, + 0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df, + 0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7, + 0x5e8, 0x5e9, 0x5ea, 0x0, 0x0, 0x200e, 0x200f, 0xff }, + /* CP1256 */ + { 0x20ac, 0x67e, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x679, 0x2039, 0x152, 0x686, 0x698, 0x688, + 0x6af, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x6a9, 0x2122, 0x691, 0x203a, 0x153, 0x200c, 0x200d, 0x6ba, + 0xa0, 0x60c, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0x6be, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0x61b, 0xbb, 0xbc, 0xbd, 0xbe, 0x61f, + 0x6c1, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627, + 0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f, + 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0xd7, + 0x637, 0x638, 0x639, 0x63a, 0x640, 0x641, 0x642, 0x643, + 0xe0, 0x644, 0xe2, 0x645, 0x646, 0x647, 0x648, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x649, 0x64a, 0xee, 0xef, + 0x64b, 0x64c, 0x64d, 0x64e, 0xf4, 0x64f, 0x650, 0xf7, + 0x651, 0xf9, 0x652, 0xfb, 0xfc, 0x200e, 0x200f, 0x6d2 }, + /* CP1257 */ + { 0x20ac, 0x0, 0x201a, 0x0, 0x201e, 0x2026, 0x2020, 0x2021, + 0x0, 0x2030, 0x0, 0x2039, 0x0, 0xa8, 0x2c7, 0xb8, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x0, 0x2122, 0x0, 0x203a, 0x0, 0xaf, 0x2db, 0x0, + 0xa0, 0x0, 0xa2, 0xa3, 0xa4, 0x0, 0xa6, 0xa7, + 0xd8, 0xa9, 0x156, 0xab, 0xac, 0xad, 0xae, 0xc6, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xf8, 0xb9, 0x157, 0xbb, 0xbc, 0xbd, 0xbe, 0xe6, + 0x104, 0x12e, 0x100, 0x106, 0xc4, 0xc5, 0x118, 0x112, + 0x10c, 0xc9, 0x179, 0x116, 0x122, 0x136, 0x12a, 0x13b, + 0x160, 0x143, 0x145, 0xd3, 0x14c, 0xd5, 0xd6, 0xd7, + 0x172, 0x141, 0x15a, 0x16a, 0xdc, 0x17b, 0x17d, 0xdf, + 0x105, 0x12f, 0x101, 0x107, 0xe4, 0xe5, 0x119, 0x113, + 0x10d, 0xe9, 0x17a, 0x117, 0x123, 0x137, 0x12b, 0x13c, + 0x161, 0x144, 0x146, 0xf3, 0x14d, 0xf5, 0xf6, 0xf7, + 0x173, 0x142, 0x15b, 0x16b, 0xfc, 0x17c, 0x17e, 0x2d9 }, + /* CP1258 */ + { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x2c6, 0x2030, 0x0, 0x2039, 0x152, 0x0, 0x0, 0x0, + 0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x2dc, 0x2122, 0x0, 0x203a, 0x153, 0x0, 0x0, 0x178, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0x102, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0x300, 0xcd, 0xce, 0xcf, + 0x110, 0xd1, 0x309, 0xd3, 0xd4, 0x1a0, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x1af, 0x303, 0xdf, + 0xe0, 0xe1, 0xe2, 0x103, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x301, 0xed, 0xee, 0xef, + 0x111, 0xf1, 0x323, 0xf3, 0xf4, 0x1a1, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x1b0, 0x20ab, 0xff } +}; +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ + +/* Handle one to five decimal digits. Return -1 in any other case. */ +static int +__micro_atoi (const char *s) +{ + int ret = 0; + + if (!*s) + return -1; + while (*s) + { + if (*s < '0' || *s > '9' || ret >= 10000) + return -1; + ret = 10 * ret + (*s++ - '0'); + } + return ret; +} + +#ifdef _MB_EXTENDED_CHARSETS_ISO +int +__iso_8859_index (const char *charset_ext) +{ + int iso_idx = __micro_atoi (charset_ext); + if (iso_idx >= 2 && iso_idx <= 16) + { + iso_idx -= 2; + if (iso_idx > 10) + --iso_idx; + return iso_idx; + } + return -1; +} +#endif /* _MB_EXTENDED_CHARSETS_ISO */ + +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS +int +__cp_index (const char *charset_ext) +{ + int cp_idx = __micro_atoi (charset_ext); + switch (cp_idx) + { + case 437: + cp_idx = 0; + break; + case 720: + cp_idx = 1; + break; + case 737: + cp_idx = 2; + break; + case 775: + cp_idx = 3; + break; + case 850: + cp_idx = 4; + break; + case 852: + cp_idx = 5; + break; + case 855: + cp_idx = 6; + break; + case 857: + cp_idx = 7; + break; + case 858: + cp_idx = 8; + break; + case 862: + cp_idx = 9; + break; + case 866: + cp_idx = 10; + break; + case 874: + cp_idx = 11; + break; + case 1125: + cp_idx = 12; + break; + case 1250: + cp_idx = 13; + break; + case 1251: + cp_idx = 14; + break; + case 1252: + cp_idx = 15; + break; + case 1253: + cp_idx = 16; + break; + case 1254: + cp_idx = 17; + break; + case 1255: + cp_idx = 18; + break; + case 1256: + cp_idx = 19; + break; + case 1257: + cp_idx = 20; + break; + case 1258: + cp_idx = 21; + break; + default: + cp_idx = -1; + break; + } + return cp_idx; +} +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ +#endif /* _MB_CAPABLE */ diff --git a/newlib/libc/stdlib/strtod.c b/newlib/libc/stdlib/strtod.c index fb4caf8..aca40a6 100644 --- a/newlib/libc/stdlib/strtod.c +++ b/newlib/libc/stdlib/strtod.c @@ -122,9 +122,7 @@ THIS SOFTWARE. /* #include <fenv.h> */ /* #endif */ -#ifdef USE_LOCALE #include "locale.h" -#endif #ifdef IEEE_Arith #ifndef NO_IEEE_Scale @@ -307,14 +305,10 @@ _DEFUN (_strtod_r, (ptr, s00, se), else if (nd < 16) z = 10*z + c - '0'; nd0 = nd; -#ifdef USE_LOCALE - if (c == *localeconv()->decimal_point) -#else - if (c == '.') -#endif + if (strcmp (s, localeconv()->decimal_point) == 0) { decpt = 1; - c = *++s; + c = *(s += strlen (localeconv()->decimal_point)); if (!nd) { for(; c == '0'; c = *++s) nz++; diff --git a/newlib/libc/stdlib/wcstod.c b/newlib/libc/stdlib/wcstod.c index 11fb922..ca1f0b5 100644 --- a/newlib/libc/stdlib/wcstod.c +++ b/newlib/libc/stdlib/wcstod.c @@ -116,8 +116,10 @@ Supporting OS subroutines required: <<close>>, <<fstat>>, <<isatty>>, #include <_ansi.h> #include <errno.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> #include <wctype.h> +#include <locale.h> #include <math.h> double @@ -167,9 +169,25 @@ _DEFUN (_wcstod_r, (ptr, nptr, endptr), * where it ended, count multibyte characters to find the * corresponding position in the wide char string. */ - if (endptr != NULL) - /* XXX Assume each wide char is one byte. */ + if (endptr != NULL) { + /* The only valid multibyte char in a float converted by + strtod/wcstod is the radix char. What we do here is, + figure out if the radix char was in the valid leading + float sequence in the incoming string. If so, the + multibyte float string is strlen(radix char) - 1 bytes + longer than the incoming wide char string has characters. + To fix endptr, reposition end as if the radix char was + just one byte long. The resulting difference (end - buf) + is then equivalent to the number of valid wide characters + in the input string. */ + len = strlen (localeconv ()->decimal_point); + if (len > 1) { + char *d = strstr (buf, localeconv ()->decimal_point); + if (d && d < end) + end -= len - 1; + } *endptr = (wchar_t *)nptr + (end - buf); + } _free_r(ptr, buf); diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c index 8d6d3fc..64210f2 100644 --- a/newlib/libc/stdlib/wctomb_r.c +++ b/newlib/libc/stdlib/wctomb_r.c @@ -4,11 +4,11 @@ #include <wchar.h> #include <locale.h> #include "mbctype.h" +#include "local.h" -extern char *__locale_charset (); - -/* for some conversions, we use the __count field as a place to store a state value */ -#define __state __count +int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset, + mbstate_t *) + = __ascii_wctomb; int _DEFUN (_wctomb_r, (r, s, wchar, state), @@ -17,196 +17,287 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), wchar_t _wchar _AND mbstate_t *state) { + return __wctomb (r, s, _wchar, __locale_charset (), state); +} + +int +_DEFUN (__ascii_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ /* Avoids compiler warnings about comparisons that are always false due to limited range when sizeof(wchar_t) is 2 but sizeof(wint_t) is 4, as is the case on cygwin. */ wint_t wchar = _wchar; - if (strlen (__locale_charset ()) <= 1) - { /* fall-through */ } - else if (!strcmp (__locale_charset (), "UTF-8")) + if (s == NULL) + return 0; + + if ((size_t)wchar >= 0x100) { - if (s == NULL) - return 0; /* UTF-8 encoding is not state-dependent */ + r->_errno = EILSEQ; + return -1; + } + + *s = (char) wchar; + return 1; +} - if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff)) +#ifdef _MB_CAPABLE +/* for some conversions, we use the __count field as a place to store a state value */ +#define __state __count + +int +_DEFUN (__utf8_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + + if (s == NULL) + return 0; /* UTF-8 encoding is not state-dependent */ + + if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff)) + { + /* At this point only the second half of a surrogate pair is valid. */ + r->_errno = EILSEQ; + return -1; + } + if (wchar <= 0x7f) + { + *s = wchar; + return 1; + } + if (wchar >= 0x80 && wchar <= 0x7ff) + { + *s++ = 0xc0 | ((wchar & 0x7c0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 2; + } + if (wchar >= 0x800 && wchar <= 0xffff) + { + if (wchar >= 0xd800 && wchar <= 0xdfff) { - /* At this point only the second half of a surrogate pair is valid. */ - r->_errno = EILSEQ; - return -1; - } - if (wchar <= 0x7f) - { - *s = wchar; - return 1; - } - else if (wchar >= 0x80 && wchar <= 0x7ff) - { - *s++ = 0xc0 | ((wchar & 0x7c0) >> 6); - *s = 0x80 | (wchar & 0x3f); - return 2; - } - else if (wchar >= 0x800 && wchar <= 0xffff) - { - if (wchar >= 0xd800 && wchar <= 0xdfff) + wint_t tmp; + /* UTF-16 surrogates -- must not occur in normal UCS-4 data */ + if (sizeof (wchar_t) != 2) + { + r->_errno = EILSEQ; + return -1; + } + if (wchar >= 0xdc00) { - wint_t tmp; - /* UTF-16 surrogates -- must not occur in normal UCS-4 data */ - if (sizeof (wchar_t) != 2) + /* Second half of a surrogate pair. It's not valid if + we don't have already read a first half of a surrogate + before. */ + if (state->__count != -4) { r->_errno = EILSEQ; return -1; } - if (wchar >= 0xdc00) - { - /* Second half of a surrogate pair. It's not valid if - we don't have already read a first half of a surrogate - before. */ - if (state->__count != -4) - { - r->_errno = EILSEQ; - return -1; - } - /* If it's valid, reconstruct the full Unicode value and - return the trailing three bytes of the UTF-8 char. */ - tmp = (state->__value.__wchb[0] << 16) - | (state->__value.__wchb[1] << 8) - | (wchar & 0x3ff); - state->__count = 0; - *s++ = 0x80 | ((tmp & 0x3f000) >> 12); - *s++ = 0x80 | ((tmp & 0xfc0) >> 6); - *s = 0x80 | (tmp & 0x3f); - return 3; - } - /* First half of a surrogate pair. Store the state and return - the first byte of the UTF-8 char. */ - tmp = ((wchar & 0x3ff) << 10) + 0x10000; - state->__value.__wchb[0] = (tmp >> 16) & 0xff; - state->__value.__wchb[1] = (tmp >> 8) & 0xff; - state->__count = -4; - *s = (0xf0 | ((tmp & 0x1c0000) >> 18)); - return 1; + /* If it's valid, reconstruct the full Unicode value and + return the trailing three bytes of the UTF-8 char. */ + tmp = (state->__value.__wchb[0] << 16) + | (state->__value.__wchb[1] << 8) + | (wchar & 0x3ff); + state->__count = 0; + *s++ = 0x80 | ((tmp & 0x3f000) >> 12); + *s++ = 0x80 | ((tmp & 0xfc0) >> 6); + *s = 0x80 | (tmp & 0x3f); + return 3; } - *s++ = 0xe0 | ((wchar & 0xf000) >> 12); - *s++ = 0x80 | ((wchar & 0xfc0) >> 6); - *s = 0x80 | (wchar & 0x3f); - return 3; - } - else if (wchar >= 0x10000 && wchar <= 0x10ffff) - { - *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18); - *s++ = 0x80 | ((wchar & 0x3f000) >> 12); - *s++ = 0x80 | ((wchar & 0xfc0) >> 6); - *s = 0x80 | (wchar & 0x3f); - return 4; - } + /* First half of a surrogate pair. Store the state and return + the first byte of the UTF-8 char. */ + tmp = ((wchar & 0x3ff) << 10) + 0x10000; + state->__value.__wchb[0] = (tmp >> 16) & 0xff; + state->__value.__wchb[1] = (tmp >> 8) & 0xff; + state->__count = -4; + *s = (0xf0 | ((tmp & 0x1c0000) >> 18)); + return 1; + } + *s++ = 0xe0 | ((wchar & 0xf000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 3; + } + if (wchar >= 0x10000 && wchar <= 0x10ffff) + { + *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18); + *s++ = 0x80 | ((wchar & 0x3f000) >> 12); + *s++ = 0x80 | ((wchar & 0xfc0) >> 6); + *s = 0x80 | (wchar & 0x3f); + return 4; + } + + r->_errno = EILSEQ; + return -1; +} + +/* Cygwin defines its own doublebyte charset conversion functions + because the underlying OS requires wchar_t == UTF-16. */ +#ifndef __CYGWIN__ +int +_DEFUN (__sjis_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + + unsigned char char2 = (unsigned char)wchar; + unsigned char char1 = (unsigned char)(wchar >> 8); + + if (s == NULL) + return 0; /* not state-dependent */ + + if (char1 != 0x00) + { + /* first byte is non-zero..validate multi-byte char */ + if (_issjis1(char1) && _issjis2(char2)) + { + *s++ = (char)char1; + *s = (char)char2; + return 2; + } else { r->_errno = EILSEQ; return -1; } } - else if (!strcmp (__locale_charset (), "SJIS")) + *s = (char) wchar; + return 1; +} + +int +_DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + unsigned char char2 = (unsigned char)wchar; + unsigned char char1 = (unsigned char)(wchar >> 8); + + if (s == NULL) + return 0; /* not state-dependent */ + + if (char1 != 0x00) { - unsigned char char2 = (unsigned char)wchar; - unsigned char char1 = (unsigned char)(wchar >> 8); - - if (s == NULL) - return 0; /* not state-dependent */ - - if (char1 != 0x00) - { - /* first byte is non-zero..validate multi-byte char */ - if (_issjis1(char1) && _issjis2(char2)) - { - *s++ = (char)char1; - *s = (char)char2; - return 2; - } - else - { - r->_errno = EILSEQ; - return -1; - } - } + /* first byte is non-zero..validate multi-byte char */ + if (_iseucjp (char1) && _iseucjp (char2)) + { + *s++ = (char)char1; + *s = (char)char2; + return 2; + } + else + { + r->_errno = EILSEQ; + return -1; + } } - else if (!strcmp (__locale_charset (), "EUCJP")) + *s = (char) wchar; + return 1; +} + +int +_DEFUN (__jis_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + int cnt = 0; + unsigned char char2 = (unsigned char)wchar; + unsigned char char1 = (unsigned char)(wchar >> 8); + + if (s == NULL) + return 1; /* state-dependent */ + + if (char1 != 0x00) { - unsigned char char2 = (unsigned char)wchar; - unsigned char char1 = (unsigned char)(wchar >> 8); - - if (s == NULL) - return 0; /* not state-dependent */ - - if (char1 != 0x00) - { - /* first byte is non-zero..validate multi-byte char */ - if (_iseucjp (char1) && _iseucjp (char2)) - { - *s++ = (char)char1; - *s = (char)char2; - return 2; - } - else + /* first byte is non-zero..validate multi-byte char */ + if (_isjis (char1) && _isjis (char2)) + { + if (state->__state == 0) { - r->_errno = EILSEQ; - return -1; + /* must switch from ASCII to JIS state */ + state->__state = 1; + *s++ = ESC_CHAR; + *s++ = '$'; + *s++ = 'B'; + cnt = 3; } - } + *s++ = (char)char1; + *s = (char)char2; + return cnt + 2; + } + r->_errno = EILSEQ; + return -1; } - else if (!strcmp (__locale_charset (), "JIS")) + if (state->__state != 0) { - int cnt = 0; - unsigned char char2 = (unsigned char)wchar; - unsigned char char1 = (unsigned char)(wchar >> 8); - - if (s == NULL) - return 1; /* state-dependent */ - - if (char1 != 0x00) - { - /* first byte is non-zero..validate multi-byte char */ - if (_isjis (char1) && _isjis (char2)) - { - if (state->__state == 0) - { - /* must switch from ASCII to JIS state */ - state->__state = 1; - *s++ = ESC_CHAR; - *s++ = '$'; - *s++ = 'B'; - cnt = 3; - } - *s++ = (char)char1; - *s = (char)char2; - return cnt + 2; - } - else - { - r->_errno = EILSEQ; - return -1; - } - } - else - { - if (state->__state != 0) - { - /* must switch from JIS to ASCII state */ - state->__state = 0; - *s++ = ESC_CHAR; - *s++ = '('; - *s++ = 'B'; - cnt = 3; - } - *s = (char)char2; - return cnt + 1; - } + /* must switch from JIS to ASCII state */ + state->__state = 0; + *s++ = ESC_CHAR; + *s++ = '('; + *s++ = 'B'; + cnt = 3; } + *s = (char)char2; + return cnt + 1; +} +#endif /* !__CYGWIN__ */ + +#ifdef _MB_EXTENDED_CHARSETS_ISO +int +_DEFUN (__iso_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; if (s == NULL) return 0; + + /* wchars <= 0x9f translate to all ISO charsets directly. */ + if (wchar >= 0xa0) + { + int iso_idx = __iso_8859_index (charset + 9); + if (iso_idx >= 0) + { + unsigned char mb; + + if (s == NULL) + return 0; + + for (mb = 0; mb < 0x60; ++mb) + if (__iso_8859_conv[iso_idx][mb] == wchar) + { + *s = (char) (mb + 0xa0); + return 1; + } + r->_errno = EILSEQ; + return -1; + } + } - /* otherwise we are dealing with a single byte character */ if ((size_t)wchar >= 0x100) { r->_errno = EILSEQ; @@ -216,4 +307,51 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), *s = (char) wchar; return 1; } - +#endif /* _MB_EXTENDED_CHARSETS_ISO */ + +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS +int +_DEFUN (__cp_wctomb, (r, s, wchar, charset, state), + struct _reent *r _AND + char *s _AND + wchar_t _wchar _AND + const char *charset _AND + mbstate_t *state) +{ + wint_t wchar = _wchar; + + if (s == NULL) + return 0; + + if (wchar >= 0x80) + { + int cp_idx = __cp_index (charset + 2); + if (cp_idx >= 0) + { + unsigned char mb; + + if (s == NULL) + return 0; + + for (mb = 0; mb < 0x80; ++mb) + if (__cp_conv[cp_idx][mb] == wchar) + { + *s = (char) (mb + 0x80); + return 1; + } + r->_errno = EILSEQ; + return -1; + } + } + + if ((size_t)wchar >= 0x100) + { + r->_errno = EILSEQ; + return -1; + } + + *s = (char) wchar; + return 1; +} +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ +#endif /* _MB_CAPABLE */ |