diff options
Diffstat (limited to 'wcsmbs/wcsrtombs.c')
-rw-r--r-- | wcsmbs/wcsrtombs.c | 95 |
1 files changed, 73 insertions, 22 deletions
diff --git a/wcsmbs/wcsrtombs.c b/wcsmbs/wcsrtombs.c index 9f10009..99ca6ac 100644 --- a/wcsmbs/wcsrtombs.c +++ b/wcsmbs/wcsrtombs.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. -Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu> +Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -25,6 +25,18 @@ Boston, MA 02111-1307, USA. */ #endif +static const wchar_t encoding_mask[] = +{ + ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff +}; + +static const unsigned char encoding_byte[] = +{ + 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; + +/* We don't need the state really because we don't have shift states + to maintain between calls to this function. */ static mbstate_t internal; size_t @@ -34,40 +46,79 @@ wcsrtombs (dst, src, len, ps) size_t len; mbstate_t *ps; { - size_t result = 0; + size_t written = 0; + const wchar_t *run = *src; if (ps == NULL) ps = &internal; - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ + if (dst == NULL) + /* The LEN parameter has to be ignored if we don't actually write + anything. */ + len = ~0; - while (len > 0 && **src != L'\0') + while (written < len) { - if ((**src & ~0xff) != 0) + wchar_t wc = *run++; + + if (wc < 0 || wc > 0x7fffffff) { + /* This is no correct ISO 10646 character. */ errno = EILSEQ; return (size_t) -1; } - if (dst != NULL) - dst[result] = (char) **src; - ++result; - ++(*src); - --len; - } - - if (len > 0) - { - if (dst != NULL) + if (wc == L'\0') + { + /* Found the end. */ + if (dst != NULL) + *dst = '\0'; + *src = NULL; + return written; + } + else if (wc < 0x80) { - dst[result] = '\0'; - *ps = 0; + /* It's an one byte sequence. */ + if (dst != NULL) + *dst++ = (char) wc; + ++written; + } + else + { + size_t step; + + for (step = 2; step < 6; ++step) + if ((wc & encoding_mask[step - 2]) == 0) + break; + + if (written + step >= len) + /* Too long. */ + break; + + if (dst != NULL) + { + size_t cnt = step; + + dst[0] = encoding_byte[cnt - 2]; + + --cnt; + do + { + dst[cnt] = 0x80 | (wc & 0x3f); + wc >>= 6; + } + while (--cnt > 0); + dst[0] |= wc; + + dst += step; + } + + written += step; } - *src = NULL; } - return result; + /* Store position of first unprocessed word. */ + *src = run; + + return written; } |