3 files changed, 22 insertions, 14 deletions
diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c
index cab8333..6c3bd3d 100644
--- a/newlib/libc/stdlib/mbtowc_r.c
+++ b/newlib/libc/stdlib/mbtowc_r.c
@@ -677,6 +677,21 @@ __utf8_mbtowc (struct _reent *r,
 	state->__count = 3;
       else if (n < (size_t)-1)
 	++n;
+      if (n < 4)
+	return -2;
+      ch = t[i++];
+      if (ch < 0x80 || ch > 0xbf)
+	{
+	  _REENT_ERRNO(r) = EILSEQ;
+	  return -1;
+	}
+      /* Note: Originally we created the low surrogate pair on systems with
+	 wchar_t == UTF-16 *before* checking the 4th byte.  This was utterly
+	 wrong, because this failed to check the last byte for being a valid
+	 value for a complete UTF-8 4 byte sequence.  As a result, calling
+	 functions happily digested the low surrogate and then got an entirely
+	 different character and handled this separately, thus generating
+	 invalid UTF-16 values. */
       if (state->__count == 3 && sizeof(wchar_t) == 2)
 	{
 	  /* On systems which have wchar_t being UTF-16 values, the value
@@ -695,15 +710,7 @@ __utf8_mbtowc (struct _reent *r,
 	    |   (wint_t)((state->__value.__wchb[2] & 0x3f) << 6);
 	  state->__count = 4;
 	  *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
-	  return i;
-	}
-      if (n < 4)
-	return -2;
-      ch = t[i++];
-      if (ch < 0x80 || ch > 0xbf)
-	{
-	  _REENT_ERRNO(r) = EILSEQ;
-	  return -1;
+	  return 3;
 	}
       tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
 	|   (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
diff --git a/newlib/libc/stdlib/wcstombs_r.c b/newlib/libc/stdlib/wcstombs_r.c
index c6a06a3..2c82a2c 100644
--- a/newlib/libc/stdlib/wcstombs_r.c
+++ b/newlib/libc/stdlib/wcstombs_r.c
@@ -17,14 +17,15 @@ _wcstombs_r (struct _reent *r,
   if (s == NULL)
     {
       size_t num_bytes = 0;
-      while (*pwcs != 0)
+      do
 	{
-	  bytes = __WCTOMB (r, buff, *pwcs++, state);
+	  bytes = __WCTOMB (r, buff, *pwcs, state);
 	  if (bytes == -1)
 	    return -1;
 	  num_bytes += bytes;
 	}
-      return num_bytes;
+      while (*pwcs++ != 0x00);
+      return num_bytes - 1;
     }
   else
     {
diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c
index 5ea1e13..ec6adfa 100644
--- a/newlib/libc/stdlib/wctomb_r.c
+++ b/newlib/libc/stdlib/wctomb_r.c
@@ -62,8 +62,8 @@ __utf8_wctomb (struct _reent *r,
 	 of the surrogate and proceed to convert the given character.  Note
 	 to return extra 3 bytes. */
       wchar_t tmp;
-      tmp = (state->__value.__wchb[0] << 16 | state->__value.__wchb[1] << 8)
-	    - (0x10000 >> 10 | 0xd80d);
+      tmp = (((state->__value.__wchb[0] << 16 | state->__value.__wchb[1] << 8)
+	    - 0x10000) >> 10) | 0xd800;
       *s++ = 0xe0 | ((tmp & 0xf000) >> 12);
       *s++ = 0x80 | ((tmp &  0xfc0) >> 6);
       *s++ = 0x80 |  (tmp &   0x3f);