diff options
Diffstat (limited to 'newlib/libc/stdlib')
-rw-r--r-- | newlib/libc/stdlib/mbtowc_r.c | 25 | ||||
-rw-r--r-- | newlib/libc/stdlib/wcstombs_r.c | 7 | ||||
-rw-r--r-- | newlib/libc/stdlib/wctomb_r.c | 4 |
3 files changed, 22 insertions, 14 deletions
diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c index cab8333..6c3bd3d 100644 --- a/newlib/libc/stdlib/mbtowc_r.c +++ b/newlib/libc/stdlib/mbtowc_r.c @@ -677,6 +677,21 @@ __utf8_mbtowc (struct _reent *r, state->__count = 3; else if (n < (size_t)-1) ++n; + if (n < 4) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + { + _REENT_ERRNO(r) = EILSEQ; + return -1; + } + /* Note: Originally we created the low surrogate pair on systems with + wchar_t == UTF-16 *before* checking the 4th byte. This was utterly + wrong, because this failed to check the last byte for being a valid + value for a complete UTF-8 4 byte sequence. As a result, calling + functions happily digested the low surrogate and then got an entirely + different character and handled this separately, thus generating + invalid UTF-16 values. */ if (state->__count == 3 && sizeof(wchar_t) == 2) { /* On systems which have wchar_t being UTF-16 values, the value @@ -695,15 +710,7 @@ __utf8_mbtowc (struct _reent *r, | (wint_t)((state->__value.__wchb[2] & 0x3f) << 6); state->__count = 4; *pwc = 0xd800 | ((tmp - 0x10000) >> 10); - return i; - } - if (n < 4) - return -2; - ch = t[i++]; - if (ch < 0x80 || ch > 0xbf) - { - _REENT_ERRNO(r) = EILSEQ; - return -1; + return 3; } tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18) | (wint_t)((state->__value.__wchb[1] & 0x3f) << 12) diff --git a/newlib/libc/stdlib/wcstombs_r.c b/newlib/libc/stdlib/wcstombs_r.c index c6a06a3..2c82a2c 100644 --- a/newlib/libc/stdlib/wcstombs_r.c +++ b/newlib/libc/stdlib/wcstombs_r.c @@ -17,14 +17,15 @@ _wcstombs_r (struct _reent *r, if (s == NULL) { size_t num_bytes = 0; - while (*pwcs != 0) + do { - bytes = __WCTOMB (r, buff, *pwcs++, state); + bytes = __WCTOMB (r, buff, *pwcs, state); if (bytes == -1) return -1; num_bytes += bytes; } - return num_bytes; + while (*pwcs++ != 0x00); + return num_bytes - 1; } else { diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c index 5ea1e13..ec6adfa 100644 --- a/newlib/libc/stdlib/wctomb_r.c +++ b/newlib/libc/stdlib/wctomb_r.c @@ -62,8 +62,8 @@ __utf8_wctomb (struct _reent *r, of the surrogate and proceed to convert the given character. Note to return extra 3 bytes. */ wchar_t tmp; - tmp = (state->__value.__wchb[0] << 16 | state->__value.__wchb[1] << 8) - - (0x10000 >> 10 | 0xd80d); + tmp = (((state->__value.__wchb[0] << 16 | state->__value.__wchb[1] << 8) + - 0x10000) >> 10) | 0xd800; *s++ = 0xe0 | ((tmp & 0xf000) >> 12); *s++ = 0x80 | ((tmp & 0xfc0) >> 6); *s++ = 0x80 | (tmp & 0x3f); |