aboutsummaryrefslogtreecommitdiff
path: root/newlib/libc/stdio/vfscanf.c
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2017-12-01 17:18:26 +0100
committerCorinna Vinschen <corinna@vinschen.de>2017-12-01 17:18:26 +0100
commita49209d2bc84644cba75a68b1104d89a810aecb1 (patch)
tree5911aa4eca620f69ef16e991a46b8af2fd63a79d /newlib/libc/stdio/vfscanf.c
parent9638c0752798c1c9aaf2e56ebaab240eae5eb8dc (diff)
downloadnewlib-a49209d2bc84644cba75a68b1104d89a810aecb1.zip
newlib-a49209d2bc84644cba75a68b1104d89a810aecb1.tar.gz
newlib-a49209d2bc84644cba75a68b1104d89a810aecb1.tar.bz2
newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t
* vfscanf: per POSIX, if the target type is wchar_t, the width is counted in (multibyte) characters, not in bytes. * vfscanf: Handle UTF-8 multibyte sequences converted to surrogate pairs on UTF-16 systems. * vfwscanf: Don't count high surrogates in input against field width counting. Per POSIX, input is Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diffstat (limited to 'newlib/libc/stdio/vfscanf.c')
-rw-r--r--newlib/libc/stdio/vfscanf.c28
1 files changed, 22 insertions, 6 deletions
diff --git a/newlib/libc/stdio/vfscanf.c b/newlib/libc/stdio/vfscanf.c
index e8e4dab..f90079d 100644
--- a/newlib/libc/stdio/vfscanf.c
+++ b/newlib/libc/stdio/vfscanf.c
@@ -488,10 +488,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
_p = _p0; \
_w; \
})
+ /* For systems with wchar_t == 2 (UTF-16) check if there's room for
+ at least 2 wchar_t's (surrogate pairs). */
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
({ \
size_t _nw = (_w); \
- if (_p_p && _p - _p0 == _nw) \
+ ptrdiff_t _dif = _p - _p0; \
+ if (_p_p && \
+ ((sizeof (_type) == 2 && _dif >= _nw - 1) \
+ || _dif >= _nw)) \
{ \
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
if (!_p0) \
@@ -499,7 +504,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
nassigned = EOF; \
goto match_failure; \
} \
- _p = _p0 + _nw; \
+ _p = _p0 + _dif; \
*_p_p = _p0; \
_nw <<= 1; \
} \
@@ -948,7 +953,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
size_t wcp_siz = 0;
#endif
mbstate_t state;
- memset (&state, 0, sizeof (mbstate_t));
if (flags & SUPPRESS)
wcp = NULL;
#ifdef _WANT_IO_POSIX_EXTENSIONS
@@ -958,13 +962,17 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
else
wcp = GET_ARG (N, ap, wchar_t *);
n = 0;
- while (width-- != 0)
+ while (width != 0)
{
if (n == MB_CUR_MAX)
goto input_failure;
buf[n++] = *fp->_p;
fp->_r -= 1;
fp->_p += 1;
+ /* Got a high surrogate, allow low surrogate to slip
+ through */
+ if (mbslen != 3 || state.__count != 4)
+ memset (&state, 0, sizeof (mbstate_t));
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
== (size_t)-1)
goto input_failure; /* Invalid sequence */
@@ -973,6 +981,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
if (mbslen != (size_t)-2) /* Incomplete sequence */
{
nread += n;
+ /* Handle high surrogate */
+ if (mbslen != 3 || state.__count != 4)
+ width -= 1;
if (!(flags & SUPPRESS))
{
#ifdef _WANT_IO_POSIX_EXTENSIONS
@@ -1122,7 +1133,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
#endif
/* Process %S and %ls placeholders */
mbstate_t state;
- memset (&state, 0, sizeof (mbstate_t));
if (flags & SUPPRESS)
wcp = &wc;
#ifdef _WANT_IO_POSIX_EXTENSIONS
@@ -1139,7 +1149,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
buf[n++] = *fp->_p;
fp->_r -= 1;
fp->_p += 1;
- width--;
+ /* Got a high surrogate, allow low surrogate to slip
+ through */
+ if (mbslen != 3 || state.__count != 4)
+ memset (&state, 0, sizeof (mbstate_t));
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
== (size_t)-1)
goto input_failure;
@@ -1154,6 +1167,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
break;
}
nread += n;
+ /* Handle high surrogate */
+ if (mbslen != 3 || state.__count != 4)
+ width -= 1;
if ((flags & SUPPRESS) == 0)
{
wcp += 1;