diff options
Diffstat (limited to 'newlib/libc/stdlib/mbtowc_r.c')
-rw-r--r-- | newlib/libc/stdlib/mbtowc_r.c | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c index cab8333..6c3bd3d 100644 --- a/newlib/libc/stdlib/mbtowc_r.c +++ b/newlib/libc/stdlib/mbtowc_r.c @@ -677,6 +677,21 @@ __utf8_mbtowc (struct _reent *r, state->__count = 3; else if (n < (size_t)-1) ++n; + if (n < 4) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + { + _REENT_ERRNO(r) = EILSEQ; + return -1; + } + /* Note: Originally we created the low surrogate pair on systems with + wchar_t == UTF-16 *before* checking the 4th byte. This was utterly + wrong, because this failed to check the last byte for being a valid + value for a complete UTF-8 4 byte sequence. As a result, calling + functions happily digested the low surrogate and then got an entirely + different character and handled this separately, thus generating + invalid UTF-16 values. */ if (state->__count == 3 && sizeof(wchar_t) == 2) { /* On systems which have wchar_t being UTF-16 values, the value @@ -695,15 +710,7 @@ __utf8_mbtowc (struct _reent *r, | (wint_t)((state->__value.__wchb[2] & 0x3f) << 6); state->__count = 4; *pwc = 0xd800 | ((tmp - 0x10000) >> 10); - return i; - } - if (n < 4) - return -2; - ch = t[i++]; - if (ch < 0x80 || ch > 0xbf) - { - _REENT_ERRNO(r) = EILSEQ; - return -1; + return 3; } tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18) | (wint_t)((state->__value.__wchb[1] & 0x3f) << 12) |