diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2023-08-02 16:55:52 +0200 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2023-08-02 16:56:24 +0200 |
commit | c49bc478b4a7e7d00a0c8540b8d6b6e99453443e (patch) | |
tree | 94599bcbebcec9ab2570a9bfdcbdfda85aa3dc1a /winsup/cygwin | |
parent | 290b56a87947620f171f65190616116fc1e0082c (diff) | |
download | newlib-c49bc478b4a7e7d00a0c8540b8d6b6e99453443e.zip newlib-c49bc478b4a7e7d00a0c8540b8d6b6e99453443e.tar.gz newlib-c49bc478b4a7e7d00a0c8540b8d6b6e99453443e.tar.bz2 |
Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diffstat (limited to 'winsup/cygwin')
-rw-r--r-- | winsup/cygwin/include/cygwin/version.h | 3 | ||||
-rw-r--r-- | winsup/cygwin/include/uchar.h | 14 | ||||
-rw-r--r-- | winsup/cygwin/release/3.5.0 | 2 | ||||
-rw-r--r-- | winsup/cygwin/strfuncs.cc | 166 |
4 files changed, 181 insertions, 4 deletions
diff --git a/winsup/cygwin/include/cygwin/version.h b/winsup/cygwin/include/cygwin/version.h index 7bc3e5e..833de64 100644 --- a/winsup/cygwin/include/cygwin/version.h +++ b/winsup/cygwin/include/cygwin/version.h @@ -482,12 +482,13 @@ details. */ 346: (Belatedly) add posix_spawn_file_actions_addchdir_np, posix_spawn_file_actions_addfchdir_np. 347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. + 348: Add c8rtomb, mbrtoc. Note that we forgot to bump the api for ualarm, strtoll, strtoull, sigaltstack, sethostname. */ #define CYGWIN_VERSION_API_MAJOR 0 -#define CYGWIN_VERSION_API_MINOR 346 +#define CYGWIN_VERSION_API_MINOR 348 /* There is also a compatibity version number associated with the shared memory regions. It is incremented when incompatible changes are made to the shared diff --git a/winsup/cygwin/include/uchar.h b/winsup/cygwin/include/uchar.h index bf865ff..ed548ac 100644 --- a/winsup/cygwin/include/uchar.h +++ b/winsup/cygwin/include/uchar.h @@ -4,8 +4,11 @@ #include <sys/cdefs.h> #include <wchar.h> -typedef __uint16_t char16_t; -typedef __uint32_t char32_t; +/* Either C2x or if C++ doesn't already define char8_t */ +#if __ISO_C_VISIBLE >= 2020 && !defined (__cpp_char8_t) +typedef unsigned char char8_t; +#endif + /* C++11 already defines those types. */ #if !defined (__cplusplus) || (__cplusplus - 0 < 201103L) typedef __uint_least16_t char16_t; @@ -14,6 +17,13 @@ typedef __uint_least32_t char32_t; __BEGIN_DECLS +/* Either C2x or if C++ defines char8_t */ +#if __ISO_C_VISIBLE >= 2020 || defined (__cpp_char8_t) +size_t c8rtomb(char * __restrict, char8_t, mbstate_t * __restrict); +size_t mbrtoc8(char8_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict); +#endif + size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict); size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0 index 7c27e1b..d71de50 100644 --- a/winsup/cygwin/release/3.5.0 +++ b/winsup/cygwin/release/3.5.0 @@ -27,7 +27,7 @@ What's new: - New API calls: posix_spawn_file_actions_addchdir_np, posix_spawn_file_actions_addfchdir_np. -- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. +- New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32. What changed: ------------- diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 770d40e..cbcd0ec 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -156,6 +156,103 @@ c16rtomb (char *s, char16_t wc, mbstate_t *ps) } extern "C" size_t +c8rtomb (char *s, char8_t c8, mbstate_t *ps) +{ + struct _reent *reent = _REENT; + char32_t wc; + + if (ps == NULL) + { + _REENT_CHECK_MISC(reent); + ps = &(_REENT_MBRTOWC_STATE(reent)); + } + + if (s == NULL) + { + ps->__count = 0; + return 1; + } + if ((ps->__count & 0xff00) != 0xc800) + { + switch (c8) + { + case 0 ... 0x7f: /* single octet */ + ps->__count = 0; + wc = c8; + break; + case 0xc2 ... 0xf4: /* valid lead byte */ + ps->__count = 0xc801; + ps->__value.__wchb[0] = c8; + return 0; + default: + goto ilseq; + } + } + else + { + /* We already collected something... */ + int idx = ps->__count & 0x3; + char8_t &c1 = ps->__value.__wchb[0]; + char8_t &c2 = ps->__value.__wchb[1]; + char8_t &c3 = ps->__value.__wchb[2]; + + switch (idx) + { + case 1: + /* Annoyingly complex check for validity for 2nd octet. */ + if (c8 <= 0x7f || c8 >= 0xc0) + goto ilseq; + if (c1 == 0xe0 && c8 <= 0x9f) + goto ilseq; + if (c1 == 0xed && c8 >= 0xa0) + goto ilseq; + if (c1 == 0xf0 && c8 <= 0x8f) + goto ilseq; + if (c1 == 0xf4 && c8 >= 0x90) + goto ilseq; + if (c1 >= 0xe0) + { + ps->__count = 0xc802; + c2 = c8; + return 0; + } + wc = ((c1 & 0x1f) << 6) + | (c8 & 0x3f); + break; + case 2: + if (c8 <= 0x7f || c8 >= 0xc0) + goto ilseq; + if (c1 >= 0xf0) + { + ps->__count = 0xc803; + c3 = c8; + return 0; + } + wc = ((c1 & 0x0f) << 12) + | ((c2 & 0x3f) << 6) + | (c8 & 0x3f); + break; + case 3: + if (c8 <= 0x7f || c8 >= 0xc0) + goto ilseq; + wc = ((c1 & 0x07) << 18) + | ((c2 & 0x3f) << 12) + | ((c3 & 0x3f) << 6) + | (c8 & 0x3f); + break; + default: /* Shouldn't happen */ + goto ilseq; + } + } + ps->__count = 0; + return c32rtomb (s, wc, ps); +ilseq: + ps->__count = 0; + _REENT_ERRNO(reent) = EILSEQ; + return (size_t)(-1); +} + +extern "C" size_t mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps) { size_t len, len2; @@ -246,6 +343,75 @@ ilseq: } extern "C" size_t +mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps) +{ + struct _reent *reent = _REENT; + size_t len; + char32_t wc; + + if (ps == NULL) + { + _REENT_CHECK_MISC(reent); + ps = &(_REENT_MBRTOWC_STATE(reent)); + } + + if (s == NULL) + { + if (ps) + ps->__count = 0; + return 1; + } + else if ((ps->__count & 0xff00) == 0xc800) + { + /* Return next utf-8 octet in line. */ + int idx = ps->__count & 0x3; + + if (pc8) + *pc8 = ps->__value.__wchb[--idx]; + if (idx == 0) + ps->__count = 0; + return -3; + } + len = mbrtoc32 (&wc, s, n, ps); + if (len > 0) + { + /* octets stored back to front for easier indexing */ + switch (wc) + { + case 0 ... 0x7f: + ps->__value.__wchb[0] = wc; + ps->__count = 0; + break; + case 0x80 ... 0x7ff: + ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6); + ps->__value.__wchb[0] = 0x80 | (wc & 0x3f); + ps->__count = 0xc800 | 1; + break; + case 0x800 ... 0xffff: + ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12); + ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6); + ps->__value.__wchb[0] = 0x80 | (wc & 0x3f); + ps->__count = 0xc800 | 2; + break; + case 0x10000 ... 0x10ffff: + ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18); + ps->__value.__wchb[2] = 0x80 | ((wc & 0x3f000) >> 12); + ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6); + ps->__value.__wchb[0] = 0x80 | (wc & 0x3f); + ps->__count = 0xc800 | 3; + break; + default: + ps->__count = 0; + _REENT_ERRNO(reent) = EILSEQ; + return (size_t)(-1); + } + if (pc8) + *pc8 = ps->__value.__wchb[ps->__count & 0x3]; + } + return len; +} + +extern "C" size_t mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps) { wint_t *ptr = dst; |