aboutsummaryrefslogtreecommitdiff
path: root/newlib/libc/stdlib/mbtowc_r.c
diff options
context:
space:
mode:
Diffstat (limited to 'newlib/libc/stdlib/mbtowc_r.c')
-rw-r--r--newlib/libc/stdlib/mbtowc_r.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c
index cab8333..6c3bd3d 100644
--- a/newlib/libc/stdlib/mbtowc_r.c
+++ b/newlib/libc/stdlib/mbtowc_r.c
@@ -677,6 +677,21 @@ __utf8_mbtowc (struct _reent *r,
state->__count = 3;
else if (n < (size_t)-1)
++n;
+ if (n < 4)
+ return -2;
+ ch = t[i++];
+ if (ch < 0x80 || ch > 0xbf)
+ {
+ _REENT_ERRNO(r) = EILSEQ;
+ return -1;
+ }
+ /* Note: Originally we created the low surrogate pair on systems with
+ wchar_t == UTF-16 *before* checking the 4th byte. This was utterly
+ wrong, because this failed to check the last byte for being a valid
+ value for a complete UTF-8 4 byte sequence. As a result, calling
+ functions happily digested the low surrogate and then got an entirely
+ different character and handled this separately, thus generating
+ invalid UTF-16 values. */
if (state->__count == 3 && sizeof(wchar_t) == 2)
{
/* On systems which have wchar_t being UTF-16 values, the value
@@ -695,15 +710,7 @@ __utf8_mbtowc (struct _reent *r,
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6);
state->__count = 4;
*pwc = 0xd800 | ((tmp - 0x10000) >> 10);
- return i;
- }
- if (n < 4)
- return -2;
- ch = t[i++];
- if (ch < 0x80 || ch > 0xbf)
- {
- _REENT_ERRNO(r) = EILSEQ;
- return -1;
+ return 3;
}
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)