aboutsummaryrefslogtreecommitdiff
path: root/newlib/libc/stdlib
diff options
context:
space:
mode:
authorJeff Johnston <jjohnstn@redhat.com>2002-09-09 21:42:14 +0000
committerJeff Johnston <jjohnstn@redhat.com>2002-09-09 21:42:14 +0000
commit9c64d2a7ba6feb196099ee8b65bba163191008c0 (patch)
treec68382219855cc0e74227118398befe77b5934a1 /newlib/libc/stdlib
parentb0591c89af3471f90b1762a712d7eb5a857cc568 (diff)
downloadnewlib-9c64d2a7ba6feb196099ee8b65bba163191008c0.zip
newlib-9c64d2a7ba6feb196099ee8b65bba163191008c0.tar.gz
newlib-9c64d2a7ba6feb196099ee8b65bba163191008c0.tar.bz2
2002-09-09 Jeff Johnston <jjohnstn@redhat.com>
* libc/include/sys/_types.h (_mbstate_t): Changed to use unsigned char internally. * libc/sys/linux/sys/_types.h: Ditto. * libc/include/sys/reent.h * libc/stdlib/mblen.c (mblen): Use function-specific state value from default reentrancy structure. * libc/stdlib/mblen_r.c (_mblen_r): If return code from _mbtowc_r is less than 0, reset state __count value and return -1. * libc/stdlib/mbrlen.c (mbrlen): If the input state pointer is NULL, use the function-specific pointer provided in the default reentrancy structure. * libc/stdlib/mbrtowc.c: Add reentrant form of function. If input state pointer is NULL, use function-specific area provided in reentrancy structure. * libc/stdlib/mbsrtowcs.c: Ditto. * libc/stdlib/wcrtomb.c: Ditto. * libc/stdlib/wcsrtombs.c: Ditto. * libc/stdlib/mbstowcs.c: Reformat. * libc/stdlib/wcstombs.c: Ditto. * libc/stdlib/mbstowcs_r.c (_mbstowcs_r): If an error occurs, reset the state's __count value and return -1. * libc/stdlib/mbtowc.c: Ditto. * libc/stdlib/mbtowc_r.c (_mbtowc_r): Add restartable functionality. If number of bytes is used up before completing a valid multibyte character, return -2 and save the state. * libc/stdlib/wctomb_r.c (_wctomb_r): Define __state as __count and change some __count references to __state for clarity.
Diffstat (limited to 'newlib/libc/stdlib')
-rw-r--r--newlib/libc/stdlib/mblen.c33
-rw-r--r--newlib/libc/stdlib/mblen_r.c20
-rw-r--r--newlib/libc/stdlib/mbrlen.c10
-rw-r--r--newlib/libc/stdlib/mbrtowc.c38
-rw-r--r--newlib/libc/stdlib/mbsrtowcs.c64
-rw-r--r--newlib/libc/stdlib/mbstowcs.c32
-rw-r--r--newlib/libc/stdlib/mbstowcs_r.c5
-rw-r--r--newlib/libc/stdlib/mbtowc.c38
-rw-r--r--newlib/libc/stdlib/mbtowc_r.c548
-rw-r--r--newlib/libc/stdlib/wcrtomb.c34
-rw-r--r--newlib/libc/stdlib/wcsrtombs.c74
-rw-r--r--newlib/libc/stdlib/wcstombs.c30
-rw-r--r--newlib/libc/stdlib/wctomb_r.c11
13 files changed, 592 insertions, 345 deletions
diff --git a/newlib/libc/stdlib/mblen.c b/newlib/libc/stdlib/mblen.c
index a0ed29a..b131095 100644
--- a/newlib/libc/stdlib/mblen.c
+++ b/newlib/libc/stdlib/mblen.c
@@ -52,21 +52,26 @@ _DEFUN (mblen, (s, n),
size_t n)
{
#ifdef MB_CAPABLE
- int retval = 0;
- _REENT_CHECK_MISC(_REENT);
-
- retval = _mbtowc_r (_REENT, NULL, s, n, &(_REENT_MBLEN_STATE(_REENT)));
- if (retval < 0)
- return -1;
- else
- return retval;
-
+ int retval = 0;
+ mbstate_t *state;
+
+ _REENT_CHECK_MISC(_REENT);
+ state = &(_REENT_MBLEN_STATE(_REENT));
+ retval = _mbtowc_r (_REENT, NULL, s, n, state);
+ if (retval < 0)
+ {
+ state->__count = 0;
+ return -1;
+ }
+ else
+ return retval;
+
#else /* not MB_CAPABLE */
- if (s == NULL || *s == '\0')
- return 0;
- if (n == 0)
- return -1;
- return 1;
+ if (s == NULL || *s == '\0')
+ return 0;
+ if (n == 0)
+ return -1;
+ return 1;
#endif /* not MB_CAPABLE */
}
diff --git a/newlib/libc/stdlib/mblen_r.c b/newlib/libc/stdlib/mblen_r.c
index cff7395..1d66597 100644
--- a/newlib/libc/stdlib/mblen_r.c
+++ b/newlib/libc/stdlib/mblen_r.c
@@ -54,14 +54,22 @@ _DEFUN (_mblen_r, (r, s, n, state),
mbstate_t *state)
{
#ifdef MB_CAPABLE
+ int retval;
+ retval = _mbtowc_r (r, NULL, s, n, state);
- return _mbtowc_r (r, NULL, s, n, state);
+ if (retval < 0)
+ {
+ state->__count = 0;
+ return -1;
+ }
+
+ return retval;
#else /* not MB_CAPABLE */
- if (s == NULL || *s == '\0')
- return 0;
- if (n == 0)
- return -1;
- return 1;
+ if (s == NULL || *s == '\0')
+ return 0;
+ if (n == 0)
+ return -1;
+ return 1;
#endif /* not MB_CAPABLE */
}
diff --git a/newlib/libc/stdlib/mbrlen.c b/newlib/libc/stdlib/mbrlen.c
index 8f0c648..504348d 100644
--- a/newlib/libc/stdlib/mbrlen.c
+++ b/newlib/libc/stdlib/mbrlen.c
@@ -7,7 +7,13 @@
size_t
mbrlen(const char *s, size_t n, mbstate_t *ps)
{
- mbstate_t internal;
+#ifdef MB_CAPABLE
+ if (ps == NULL)
+ {
+ _REENT_CHECK_MISC(_REENT);
+ ps = &(_REENT_MBRLEN_STATE(_REENT));
+ }
+#endif
- return mbrtowc(NULL, s, n, ps != NULL ? ps : &internal);
+ return mbrtowc(NULL, s, n, ps);
}
diff --git a/newlib/libc/stdlib/mbrtowc.c b/newlib/libc/stdlib/mbrtowc.c
index 7934ca1..0db1208 100644
--- a/newlib/libc/stdlib/mbrtowc.c
+++ b/newlib/libc/stdlib/mbrtowc.c
@@ -6,24 +6,46 @@
#include <string.h>
size_t
-mbrtowc(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
+_DEFUN (_mbrtowc_r, (ptr, pwc, s, n, ps),
+ struct _reent *ptr _AND
+ wchar_t *pwc _AND
+ const char *s _AND
+ size_t n _AND
+ mbstate_t *ps)
{
int retval = 0;
- _REENT_CHECK_MISC(_REENT);
+
+#ifdef MB_CAPABLE
+ if (ps == NULL)
+ {
+ _REENT_CHECK_MISC(ptr);
+ ps = &(_REENT_MBRTOWC_STATE(ptr));
+ }
+#endif
if (s == NULL)
- retval = _mbtowc_r (_REENT, pwc, "", 1, ps);
+ retval = _mbtowc_r (ptr, pwc, "", 1, ps);
else
- retval = _mbtowc_r (_REENT, pwc, s, n, ps);
-
- if (*pwc == NULL)
- memset (ps, '\0', sizeof (mbstate_t));
+ retval = _mbtowc_r (ptr, pwc, s, n, ps);
if (retval == -1)
{
- _REENT->_errno = EILSEQ;
+ ps->__count = 0;
+ ptr->_errno = EILSEQ;
return (size_t)(-1);
}
else
return (size_t)retval;
}
+
+#ifndef _REENT_ONLY
+size_t
+_DEFUN (mbrtowc, (pwc, s, n, ps),
+ wchar_t *pwc _AND
+ const char *s _AND
+ size_t n _AND
+ mbstate_t *ps)
+{
+ return _mbrtowc_r (_REENT, pwc, s, n, ps);
+}
+#endif /* !_REENT_ONLY */
diff --git a/newlib/libc/stdlib/mbsrtowcs.c b/newlib/libc/stdlib/mbsrtowcs.c
index 256ab0d..08d9d0c 100644
--- a/newlib/libc/stdlib/mbsrtowcs.c
+++ b/newlib/libc/stdlib/mbsrtowcs.c
@@ -5,20 +5,62 @@
#include <errno.h>
size_t
-mbsrtowcs(wchar_t *dst, const char **src, size_t len, mbstate_t *ps)
+_DEFUN (_mbsrtowcs_r, (r, dst, src, n, ps),
+ struct _reent *r _AND
+ wchar_t *dst _AND
+ const char **src _AND
+ size_t n _AND
+ mbstate_t *ps)
{
- int retval = 0;
- mbstate_t internal;
+ wchar_t *ptr = dst;
+ size_t max = n;
+ int bytes;
- _REENT_CHECK_MISC(_REENT);
-
- retval = _mbstowcs_r (_REENT, dst, *src, len, ps != NULL ? ps : &internal);
+#ifdef MB_CAPABLE
+ if (ps == NULL)
+ {
+ _REENT_CHECK_MISC(r);
+ ps = &(_REENT_MBSRTOWCS_STATE(r));
+ }
+#endif
- if (retval == -1)
+ while (n > 0)
{
- _REENT->_errno = EILSEQ;
- return (size_t)(-1);
+ bytes = _mbtowc_r (r, ptr, *src, MB_CUR_MAX, ps);
+ if (bytes > 0)
+ {
+ *src += bytes;
+ ++ptr;
+ --n;
+ }
+ else if (bytes == -2)
+ {
+ *src += MB_CUR_MAX;
+ }
+ else if (bytes == 0)
+ {
+ *src = NULL;
+ return (size_t)(ptr - dst);
+ }
+ else
+ {
+ ps->__count = 0;
+ r->_errno = EILSEQ;
+ return (size_t)-1;
+ }
}
- else
- return (size_t)retval;
+
+ return (size_t)max;
+}
+
+#ifndef _REENT_ONLY
+size_t
+_DEFUN (mbsrtowcs, (dst, src, len, ps),
+ wchar_t *dst _AND
+ const char **src _AND
+ size_t len _AND
+ mbstate_t *ps)
+{
+ return _mbsrtowcs_r (_REENT, dst, src, len, ps);
}
+#endif /* !_REENT_ONLY */
diff --git a/newlib/libc/stdlib/mbstowcs.c b/newlib/libc/stdlib/mbstowcs.c
index eb0ccd7..cb09f31 100644
--- a/newlib/libc/stdlib/mbstowcs.c
+++ b/newlib/libc/stdlib/mbstowcs.c
@@ -59,23 +59,23 @@ _DEFUN (mbstowcs, (pwcs, s, n),
size_t n)
{
#ifdef MB_CAPABLE
- mbstate_t state;
- state.__count = 0;
-
- return _mbstowcs_r (_REENT, pwcs, s, n, &state);
+ mbstate_t state;
+ state.__count = 0;
+
+ return _mbstowcs_r (_REENT, pwcs, s, n, &state);
#else /* not MB_CAPABLE */
-
- int count = 0;
-
- if (n != 0) {
- do {
- if ((*pwcs++ = (wchar_t) *s++) == 0)
- break;
- count++;
- } while (--n != 0);
- }
-
- return count;
+
+ int count = 0;
+
+ if (n != 0) {
+ do {
+ if ((*pwcs++ = (wchar_t) *s++) == 0)
+ break;
+ count++;
+ } while (--n != 0);
+ }
+
+ return count;
#endif /* not MB_CAPABLE */
}
diff --git a/newlib/libc/stdlib/mbstowcs_r.c b/newlib/libc/stdlib/mbstowcs_r.c
index 89cda03..c6130b2 100644
--- a/newlib/libc/stdlib/mbstowcs_r.c
+++ b/newlib/libc/stdlib/mbstowcs_r.c
@@ -18,7 +18,10 @@ _DEFUN (_mbstowcs_r, (reent, pwcs, s, n, state),
{
bytes = _mbtowc_r (r, ptr, t, MB_CUR_MAX, state);
if (bytes < 0)
- return -1;
+ {
+ state->__count = 0;
+ return -1;
+ }
else if (bytes == 0)
return ptr - pwcs;
t += bytes;
diff --git a/newlib/libc/stdlib/mbtowc.c b/newlib/libc/stdlib/mbtowc.c
index 6da735a..e1e725d 100644
--- a/newlib/libc/stdlib/mbtowc.c
+++ b/newlib/libc/stdlib/mbtowc.c
@@ -52,6 +52,7 @@ effects vary with the locale.
#ifndef _REENT_ONLY
#include <stdlib.h>
+#include <wchar.h>
int
_DEFUN (mbtowc, (pwc, s, n),
@@ -60,23 +61,28 @@ _DEFUN (mbtowc, (pwc, s, n),
size_t n)
{
#ifdef MB_CAPABLE
- int retval = 0;
- _REENT_CHECK_MISC(_REENT);
-
- retval = _mbtowc_r (_REENT, pwc, s, n, &(_REENT_MBTOWC_STATE(_REENT)));
-
- if (retval < 0)
- return -1;
- else
- return retval;
+ int retval = 0;
+ mbstate_t *ps;
+
+ _REENT_CHECK_MISC(_REENT);
+ ps = &(_REENT_MBTOWC_STATE(_REENT));
+
+ retval = _mbtowc_r (_REENT, pwc, s, n, ps);
+
+ if (retval < 0)
+ {
+ ps->__count = 0;
+ return -1;
+ }
+ return retval;
#else /* not MB_CAPABLE */
- if (s == NULL)
- return 0;
- if (n == 0)
- return -1;
- if (pwc)
- *pwc = (wchar_t) *s;
- return (*s != '\0');
+ if (s == NULL)
+ return 0;
+ if (n == 0)
+ return -1;
+ if (pwc)
+ *pwc = (wchar_t) *s;
+ return (*s != '\0');
#endif /* not MB_CAPABLE */
}
diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c
index b7909d3..26141b4 100644
--- a/newlib/libc/stdlib/mbtowc_r.c
+++ b/newlib/libc/stdlib/mbtowc_r.c
@@ -7,9 +7,9 @@
#ifdef MB_CAPABLE
typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J,
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
-typedef enum { ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
- J2_ESC, J2_ESC_BR, DONE, INV, JIS_S_NUM } JIS_STATE;
-typedef enum { COPY_A, COPY_J, COPY_J2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR } JIS_ACTION;
+typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
+ INV, JIS_S_NUM } JIS_STATE;
+typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
/**************************************************************************************
* state/action tables for processing JIS encoding
@@ -20,33 +20,30 @@ typedef enum { COPY_A, COPY_J, COPY_J2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR } JIS
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
-/* ASCII */ { A_ESC, DONE, DONE, DONE, DONE, DONE, DONE, DONE, DONE },
-/* A_ESC */ { DONE, A_ESC_DL, DONE, DONE, DONE, DONE, DONE, DONE, DONE },
-/* A_ESC_DL */{ DONE, DONE, DONE, JIS, JIS, DONE, DONE, DONE, DONE },
+/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
/* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
-/* JIS_1 */ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2, INV },
-/* JIS_2 */ { J2_ESC, DONE, DONE, DONE, DONE, DONE, INV, DONE, DONE },
+/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
+/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
+/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
/* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
/* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
-/* J2_ESC */ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
-/* J2_ESC_BR*/{ INV, INV, INV, INV, DONE, DONE, INV, INV, INV },
};
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
/* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
+/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
/* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
-/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, MAKE_J, MAKE_J, COPY_A, COPY_A, COPY_A, COPY_A},
-/* JIS */ { NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
-/* JIS_1 */ { ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
-/* JIS_2 */ { NOOP, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, COPY_J2},
+/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
+/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
/* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
-/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
-/* J2_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
-/* J2_ESC_BR*/{ ERROR, ERROR, ERROR, ERROR, COPY_J, COPY_J, ERROR, ERROR, ERROR },
+/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
};
#endif /* MB_CAPABLE */
+/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
+#define __state __count
+
int
_DEFUN (_mbtowc_r, (r, pwc, s, n, state),
struct _reent *r _AND
@@ -70,230 +67,305 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
{ /* fall-through */ }
else if (!strcmp (r->_current_locale, "C-UTF-8"))
{
- wchar_t char1 = 0;
+ int ch;
+ int i = 0;
if (s == NULL)
return 0; /* UTF-8 character encodings are not state-dependent */
- /* we know n >= 1 if we get here */
- *pwc = 0;
- char1 = (wchar_t)*t;
-
- if (char1 == '\0')
- return 0; /* s points to the null character */
-
- if (char1 >= 0x0 && char1 <= 0x7f)
- {
- /* single-byte sequence */
- *pwc = char1;
- return 1;
- }
- else if (char1 >= 0xc0 && char1 <= 0xdf)
- {
- /* two-byte sequence */
- if (n >= 2)
- {
- wchar_t char2 = (wchar_t)*(t+1);
-
- if (char2 < 0x80 || char2 > 0xbf)
- return -1;
-
- if (char1 < 0xc2)
- /* overlong UTF-8 sequence */
- return -1;
-
- *pwc = ((char1 & 0x1f) << 6)
- | (char2 & 0x3f);
- return 2;
- }
- else
- return -1;
- }
- else if (char1 >= 0xe0 && char1 <= 0xef)
- {
- /* three-byte sequence */
- if (n >= 3)
- {
- wchar_t char2 = (wchar_t)*(t+1);
- wchar_t char3 = (wchar_t)*(t+2);
-
- if (char2 < 0x80 || char2 > 0xbf)
- return -1;
- if (char3 < 0x80 || char3 > 0xbf)
- return -1;
-
- if (char1 == 0xe0)
- {
- if (char2 < 0xa0)
- /* overlong UTF-8 sequence */
- return -1;
- }
-
- *pwc = ((char1 & 0x0f) << 12)
- | ((char2 & 0x3f) << 6)
- | (char3 & 0x3f);
-
- if (*pwc >= 0xd800 && *pwc <= 0xdfff)
- {
- return -1;
- }
- else
- return 3;
- }
- else
- return -2;
- }
- else if (char1 >= 0xf0 && char1 <= 0xf7)
- {
- /* four-byte sequence */
- if (n >= 4)
- {
- wchar_t char2 = (wchar_t)*(t+1);
- wchar_t char3 = (wchar_t)*(t+2);
- wchar_t char4 = (wchar_t)*(t+3);
-
- if (char2 < 0x80 || char2 > 0xbf)
- return -1;
- if (char3 < 0x80 || char3 > 0xbf)
- return -1;
- if (char4 < 0x80 || char4 > 0xbf)
- return -1;
-
- if (char1 == 0xf0)
- {
- if (char2 < 0x90)
- /* overlong UTF-8 sequence */
- return -1;
- }
-
- *pwc = ((char1 & 0x07) << 18)
- | ((char2 & 0x3f) << 12)
- | ((char3 & 0x3f) << 6)
- | (char4 & 0x3f);
-
- return 4;
- }
- else
- return -2;
- }
- else if (char1 >= 0xf8 && char1 <= 0xfb)
- {
- /* five-byte sequence */
- if (n >= 5)
- {
- wchar_t char2 = (wchar_t)*(t+1);
- wchar_t char3 = (wchar_t)*(t+2);
- wchar_t char4 = (wchar_t)*(t+3);
- wchar_t char5 = (wchar_t)*(t+4);
-
- if (char2 < 0x80 || char2 > 0xbf)
- return -1;
- if (char3 < 0x80 || char3 > 0xbf)
- return -1;
- if (char4 < 0x80 || char4 > 0xbf)
- return -1;
- if (char5 < 0x80 || char5 > 0xbf)
- return -1;
-
- if (char1 == 0xf8)
- {
- if (char2 < 0x88)
- /* overlong UTF-8 sequence */
- return -1;
- }
-
- *pwc = ((char1 & 0x03) << 24)
- | ((char2 & 0x3f) << 18)
- | ((char3 & 0x3f) << 12)
- | ((char4 & 0x3f) << 6)
- | (char5 & 0x3f);
- return 5;
- }
- else
- return -2;
- }
- else if (char1 >= 0xfc && char1 <= 0xfd)
+ if (state->__count == 0)
+ ch = t[i++];
+ else
+ {
+ ++n;
+ ch = state->__value.__wchb[0];
+ }
+
+ if (ch == '\0')
+ {
+ *pwc = 0;
+ state->__count = 0;
+ return 0; /* s points to the null character */
+ }
+
+ if (ch >= 0x0 && ch <= 0x7f)
+ {
+ /* single-byte sequence */
+ state->__count = 0;
+ *pwc = ch;
+ return 1;
+ }
+ else if (ch >= 0xc0 && ch <= 0xdf)
+ {
+ /* two-byte sequence */
+ state->__value.__wchb[0] = ch;
+ state->__count = 1;
+ if (n < 2)
+ return -2;
+ ch = t[i++];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ if (state->__value.__wchb[0] < 0xc2)
+ /* overlong UTF-8 sequence */
+ return -1;
+ state->__count = 0;
+ *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
+ | (wchar_t)(ch & 0x3f);
+ return i;
+ }
+ else if (ch >= 0xe0 && ch <= 0xef)
+ {
+ /* three-byte sequence */
+ wchar_t tmp;
+ state->__value.__wchb[0] = ch;
+ if (state->__count == 0)
+ state->__count = 1;
+ else
+ ++n;
+ if (n < 2)
+ return -2;
+ ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
+ if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
+ /* overlong UTF-8 sequence */
+ return -1;
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[1] = ch;
+ state->__count = 2;
+ if (n < 3)
+ return -2;
+ ch = t[i++];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__count = 0;
+ tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
+ | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
+ | (wchar_t)(ch & 0x3f);
+
+ if (tmp >= 0xd800 && tmp <= 0xdfff)
+ return -1;
+ *pwc = tmp;
+ return i;
+ }
+ else if (ch >= 0xf0 && ch <= 0xf7)
+ {
+ /* four-byte sequence */
+ if (sizeof(wchar_t) < 4)
+ return -1; /* we can't store such a value */
+ state->__value.__wchb[0] = ch;
+ if (state->__count == 0)
+ state->__count = 1;
+ else
+ ++n;
+ if (n < 2)
+ return -2;
+ ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
+ if (state->__value.__wchb[0] == 0xf0 && ch < 0x90)
+ /* overlong UTF-8 sequence */
+ return -1;
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[1] = ch;
+ if (state->__count == 1)
+ state->__count = 2;
+ else
+ ++n;
+ if (n < 3)
+ return -2;
+ ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[2] = ch;
+ state->__count = 3;
+ if (n < 4)
+ return -2;
+ ch = t[i++];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ *pwc = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18)
+ | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12)
+ | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6)
+ | (wchar_t)(ch & 0x3f);
+
+ state->__count = 0;
+ return i;
+ }
+ else if (ch >= 0xf8 && ch <= 0xfb)
+ {
+ /* five-byte sequence */
+ if (sizeof(wchar_t) < 4)
+ return -1; /* we can't store such a value */
+ state->__value.__wchb[0] = ch;
+ if (state->__count == 0)
+ state->__count = 1;
+ else
+ ++n;
+ if (n < 2)
+ return -2;
+ ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
+ if (state->__value.__wchb[0] == 0xf8 && ch < 0x88)
+ /* overlong UTF-8 sequence */
+ return -1;
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[1] = ch;
+ if (state->__count == 1)
+ state->__count = 2;
+ else
+ ++n;
+ if (n < 3)
+ return -2;
+ ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[2] = ch;
+ if (state->__count == 2)
+ state->__count = 3;
+ else
+ ++n;
+ if (n < 4)
+ return -2;
+ ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[3] = ch;
+ state->__count = 4;
+ if (n < 5)
+ return -2;
+ ch = t[i++];
+ *pwc = (wchar_t)((state->__value.__wchb[0] & 0x03) << 24)
+ | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 18)
+ | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 12)
+ | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 6)
+ | (wchar_t)(ch & 0x3f);
+
+ state->__count = 0;
+ return i;
+ }
+ else if (ch >= 0xfc && ch <= 0xfd)
{
/* six-byte sequence */
- if (n >= 6)
- {
- wchar_t char2 = (wchar_t)*(t+1);
- wchar_t char3 = (wchar_t)*(t+2);
- wchar_t char4 = (wchar_t)*(t+3);
- wchar_t char5 = (wchar_t)*(t+4);
- wchar_t char6 = (wchar_t)*(t+5);
-
- if (char2 < 0x80 || char2 > 0xbf)
- return -1;
- if (char3 < 0x80 || char3 > 0xbf)
- return -1;
- if (char4 < 0x80 || char4 > 0xbf)
- return -1;
- if (char5 < 0x80 || char5 > 0xbf)
- return -1;
- if (char6 < 0x80 || char6 > 0xbf)
- return -1;
-
- if (char1 == 0xfc)
- {
- if (char2 < 0x84)
- /* overlong UTF-8 sequence */
- return -1;
- }
-
- *pwc = ((char1 & 0x01) << 30)
- | ((char2 & 0x3f) << 24)
- | ((char3 & 0x3f) << 18)
- | ((char4 & 0x3f) << 12)
- | ((char5 & 0x3f) << 6)
- | (char6 & 0x3f);
- return 6;
- }
- else
- return -2;
- }
+ int ch2;
+ if (sizeof(wchar_t) < 4)
+ return -1; /* we can't store such a value */
+ state->__value.__wchb[0] = ch;
+ if (state->__count == 0)
+ state->__count = 1;
+ else
+ ++n;
+ if (n < 2)
+ return -2;
+ ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
+ if (state->__value.__wchb[0] == 0xfc && ch < 0x84)
+ /* overlong UTF-8 sequence */
+ return -1;
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[1] = ch;
+ if (state->__count == 1)
+ state->__count = 2;
+ else
+ ++n;
+ if (n < 3)
+ return -2;
+ ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[2] = ch;
+ if (state->__count == 2)
+ state->__count = 3;
+ else
+ ++n;
+ if (n < 4)
+ return -2;
+ ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ state->__value.__wchb[3] = ch;
+ if (state->__count == 3)
+ state->__count = 4;
+ else
+ ++n;
+ if (n < 5)
+ return -2;
+ if (n == 5)
+ return -1; /* at this point we can't save enough to restart */
+ ch = t[i++];
+ if (ch < 0x80 || ch > 0xbf)
+ return -1;
+ ch2 = t[i++];
+ *pwc = (wchar_t)((state->__value.__wchb[0] & 0x01) << 30)
+ | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 24)
+ | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 18)
+ | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 12)
+ | (wchar_t)((ch & 0x3f) << 6)
+ | (wchar_t)(ch2 & 0x3f);
+
+ state->__count = 0;
+ return i;
+ }
else
- return -1;
+ return -1;
}
else if (!strcmp (r->_current_locale, "C-SJIS"))
{
- int char1;
+ int ch;
+ int i = 0;
if (s == NULL)
return 0; /* not state-dependent */
- char1 = *t;
- if (_issjis1 (char1))
- {
- int char2 = t[1];
- if (n <= 1)
- return -2;
- if (_issjis2 (char2))
- {
- *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
- return 2;
- }
- else
- return -1;
- }
+ ch = t[i++];
+ if (state->__count == 0)
+ {
+ if (_issjis1 (ch))
+ {
+ state->__value.__wchb[0] = ch;
+ state->__count = 1;
+ if (n <= 1)
+ return -2;
+ ch = t[i++];
+ }
+ }
+ if (state->__count == 1)
+ {
+ if (_issjis2 (ch))
+ {
+ *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
+ state->__count = 0;
+ return i;
+ }
+ else
+ return -1;
+ }
}
else if (!strcmp (r->_current_locale, "C-EUCJP"))
{
- int char1;
+ int ch;
+ int i = 0;
if (s == NULL)
return 0; /* not state-dependent */
- char1 = *t;
- if (_iseucjp (char1))
- {
- int char2 = t[1];
- if (n <= 1)
- return -2;
- if (_iseucjp (char2))
- {
- *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
- return 2;
- }
- else
- return -1;
- }
+ ch = t[i++];
+ if (state->__count == 0)
+ {
+ if (_iseucjp (ch))
+ {
+ state->__value.__wchb[0] = ch;
+ state->__count = 1;
+ if (n <= 1)
+ return -2;
+ ch = t[i++];
+ }
+ }
+ if (state->__count == 1)
+ {
+ if (_iseucjp (ch))
+ {
+ *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
+ state->__count = 0;
+ return i;
+ }
+ else
+ return -1;
+ }
}
else if (!strcmp (r->_current_locale, "C-JIS"))
{
@@ -301,15 +373,16 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
JIS_ACTION action;
JIS_CHAR_TYPE ch;
unsigned char *ptr;
- int i, curr_ch;
+ unsigned int i;
+ int curr_ch;
if (s == NULL)
{
- state->__count = 0;
+ state->__state = ASCII;
return 1; /* state-dependent */
}
- curr_state = (state->__count == 0 ? ASCII : JIS);
+ curr_state = state->__state;
ptr = t;
for (i = 0; i < n; ++i)
@@ -353,23 +426,21 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
case NOOP:
break;
case EMPTY:
- state->__count = 0;
+ state->__state = ASCII;
*pwc = (wchar_t)0;
- return i;
+ return 0;
case COPY_A:
- state->__count = 0;
+ state->__state = ASCII;
*pwc = (wchar_t)*ptr;
return (i + 1);
- case COPY_J:
- state->__count = 0;
- *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
- return (i + 1);
+ case COPY_J1:
+ state->__value.__wchb[0] = t[i];
+ break;
case COPY_J2:
- state->__count = 1;
- *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
- return (ptr - t) + 2;
+ state->__state = JIS;
+ *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
+ return (i + 1);
case MAKE_A:
- case MAKE_J:
ptr = (char *)(t + i + 1);
break;
case ERROR:
@@ -379,6 +450,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
}
+ state->__state = curr_state;
return -2; /* n < bytes needed */
}
#endif /* MB_CAPABLE */
diff --git a/newlib/libc/stdlib/wcrtomb.c b/newlib/libc/stdlib/wcrtomb.c
index f68533c..0eb59ca 100644
--- a/newlib/libc/stdlib/wcrtomb.c
+++ b/newlib/libc/stdlib/wcrtomb.c
@@ -5,21 +5,45 @@
#include <errno.h>
size_t
-wcrtomb(char *s, wchar_t wc, mbstate_t *ps)
+_DEFUN (_wcrtomb_r, (ptr, s, wc, ps),
+ struct _reent *ptr _AND
+ char *s _AND
+ wchar_t wc _AND
+ mbstate_t *ps)
{
int retval = 0;
- _REENT_CHECK_MISC(_REENT);
+ char buf[10];
+
+#ifdef MB_CAPABLE
+ if (ps == NULL)
+ {
+ _REENT_CHECK_MISC(ptr);
+ ps = &(_REENT_WCRTOMB_STATE(ptr));
+ }
+#endif
if (s == NULL)
- retval = _wctomb_r (_REENT, "", wc, ps);
+ retval = _wctomb_r (ptr, buf, L'\0', ps);
else
- retval = _wctomb_r (_REENT, s, wc, ps);
+ retval = _wctomb_r (ptr, s, wc, ps);
if (retval == -1)
{
- _REENT->_errno = EILSEQ;
+ ps->__count = 0;
+ ptr->_errno = EILSEQ;
return (size_t)(-1);
}
else
return (size_t)retval;
}
+
+#ifndef _REENT_ONLY
+size_t
+_DEFUN (wcrtomb, (s, wc, ps),
+ char *s _AND
+ wchar_t wc _AND
+ mbstate_t *ps)
+{
+ return _wcrtomb_r (_REENT, s, wc, ps);
+}
+#endif /* !_REENT_ONLY */
diff --git a/newlib/libc/stdlib/wcsrtombs.c b/newlib/libc/stdlib/wcsrtombs.c
index 22512c0..4313478 100644
--- a/newlib/libc/stdlib/wcsrtombs.c
+++ b/newlib/libc/stdlib/wcsrtombs.c
@@ -5,18 +5,74 @@
#include <errno.h>
size_t
-wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps)
+_DEFUN (_wcsrtombs_r, (r, dst, src, len, ps),
+ struct _reent *r _AND
+ char *dst _AND
+ const wchar_t **src _AND
+ size_t len _AND
+ mbstate_t *ps)
{
- int retval = 0;
- _REENT_CHECK_MISC(_REENT);
+ char *ptr = dst;
+ char buff[10];
+ int i, n;
+ int count;
+ wint_t wch;
- retval = _wcstombs_r (_REENT, dst, *src, len, ps);
+#ifdef MB_CAPABLE
+ if (ps == NULL)
+ {
+ _REENT_CHECK_MISC(r);
+ ps = &(_REENT_WCSRTOMBS_STATE(r));
+ }
+#endif
- if (retval == -1)
+ n = (int)len;
+
+ while (n > 0)
{
- _REENT->_errno = EILSEQ;
- return (size_t)(-1);
+ wchar_t *pwcs = (wchar_t *)(*src);
+ int count = ps->__count;
+ wint_t wch = ps->__value.__wch;
+ int bytes = _wctomb_r (r, buff, *pwcs, ps);
+ if (bytes == -1)
+ {
+ r->_errno = EILSEQ;
+ ps->__count = 0;
+ return (size_t)-1;
+ }
+ if (bytes <= n)
+ {
+ for (i = 0; i < bytes; ++i)
+ *ptr++ = buff[i];
+
+ if (*pwcs == 0x00)
+ {
+ *src = NULL;
+ ps->__count = 0;
+ return (size_t)(ptr - dst - 1);
+ }
+ ++(*src);
+ }
+ else
+ {
+ /* not enough room, we must back up state to before _wctomb_r call */
+ ps->__count = count;
+ ps->__value.__wch = wch;
+ }
+ n -= bytes;
}
- else
- return (size_t)retval;
+
+ return (size_t)(ptr - dst);
+}
+
+#ifndef _REENT_ONLY
+size_t
+_DEFUN (wcsrtombs, (dst, src, len, ps),
+ char *dst _AND
+ const wchar_t **src _AND
+ size_t len _AND
+ mbstate_t *ps)
+{
+ return _wcsrtombs_r (_REENT, dst, src, len, ps);
}
+#endif /* !_REENT_ONLY */
diff --git a/newlib/libc/stdlib/wcstombs.c b/newlib/libc/stdlib/wcstombs.c
index c984746..f02d4ab 100644
--- a/newlib/libc/stdlib/wcstombs.c
+++ b/newlib/libc/stdlib/wcstombs.c
@@ -60,22 +60,22 @@ _DEFUN (wcstombs, (s, pwcs, n),
size_t n)
{
#ifdef MB_CAPABLE
- mbstate_t state;
- state.__count = 0;
-
- return _wcstombs_r (_REENT, s, pwcs, n, &state);
+ mbstate_t state;
+ state.__count = 0;
+
+ return _wcstombs_r (_REENT, s, pwcs, n, &state);
#else /* not MB_CAPABLE */
- int count = 0;
-
- if (n != 0) {
- do {
- if ((*s++ = (char) *pwcs++) == 0)
- break;
- count++;
- } while (--n != 0);
- }
-
- return count;
+ int count = 0;
+
+ if (n != 0) {
+ do {
+ if ((*s++ = (char) *pwcs++) == 0)
+ break;
+ count++;
+ } while (--n != 0);
+ }
+
+ return count;
#endif /* not MB_CAPABLE */
}
diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c
index 7cd84b6..4bcabee 100644
--- a/newlib/libc/stdlib/wctomb_r.c
+++ b/newlib/libc/stdlib/wctomb_r.c
@@ -4,6 +4,9 @@
#include <locale.h>
#include "mbctype.h"
+/* for some conversions, we use the __count field as a place to store a state value */
+#define __state __count
+
int
_DEFUN (_wctomb_r, (r, s, wchar, state),
struct _reent *r _AND
@@ -126,10 +129,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
/* first byte is non-zero..validate multi-byte char */
if (_isjis (char1) && _isjis (char2))
{
- if (state->__count == 0)
+ if (state->__state == 0)
{
/* must switch from ASCII to JIS state */
- state->__count = 1;
+ state->__state = 1;
*s++ = ESC_CHAR;
*s++ = '$';
*s++ = 'B';
@@ -144,10 +147,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
}
else
{
- if (state->__count != 0)
+ if (state->__state != 0)
{
/* must switch from JIS to ASCII state */
- state->__count = 0;
+ state->__state = 0;
*s++ = ESC_CHAR;
*s++ = '(';
*s++ = 'B';