diff options
-rw-r--r-- | src/locale/iconv.c | 43 |
1 files changed, 40 insertions, 3 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c index 1784dc9..c5dd122 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -16,6 +16,9 @@ #define WCHAR_T 0306 #define US_ASCII 0307 #define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 #define EUC_JP 0320 #define SHIFT_JIS 0321 #define ISO2022_JP 0322 @@ -35,13 +38,16 @@ static const unsigned char charmaps[] = "utf8\0char\0\0\310" "wchart\0\0\306" -"ucs2\0ucs2be\0\0\304" +"ucs2be\0\0\304" "ucs2le\0\0\305" -"utf16\0utf16be\0\0\302" +"utf16be\0\0\302" "utf16le\0\0\301" -"ucs4\0ucs4be\0utf32\0utf32be\0\0\300" +"ucs4be\0utf32be\0\0\300" "ucs4le\0utf32le\0\0\303" "ascii\0usascii\0iso646\0iso646us\0\0\307" +"utf16\0\0\312" +"ucs4\0utf32\0\0\313" +"ucs2\0\0\314" "eucjp\0\0\320" "shiftjis\0sjis\0\0\321" "iso2022jp\0\0\322" @@ -145,6 +151,9 @@ iconv_t iconv_open(const char *to, const char *from) iconv_t cd = combine_to_from(t, f); switch (charmaps[f]) { + case UTF_16: + case UTF_32: + case UCS2: case ISO2022_JP: scd = malloc(sizeof *scd); if (!scd) return (iconv_t)-1; @@ -285,6 +294,31 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri c = ((c-0xd7c0)<<10) + (d-0xdc00); } break; + case UCS2: + case UTF_16: + l = 0; + if (!scd->state) { + if (*inb < 2) goto starved; + c = get_16((void *)*in, 0); + scd->state = type==UCS2 + ? c==0xfffe ? UCS2LE : UCS2BE + : c==0xfffe ? UTF_16LE : UTF_16BE; + if (c == 0xfffe || c == 0xfeff) + l = 2; + } + type = scd->state; + continue; + case UTF_32: + l = 0; + if (!scd->state) { + if (*inb < 4) goto starved; + c = get_32((void *)*in, 0); + scd->state = c==0xfffe0000 ? UTF_32LE : UTF_32BE; + if (c == 0xfffe0000 || c == 0xfeff) + l = 4; + } + type = scd->state; + continue; case SHIFT_JIS: if (c < 128) break; if (c-0xa1 <= 0xdf-0xa1) { @@ -589,8 +623,11 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri *(*out)++ = 'B'; *outb -= 8; break; + case UCS2: + totype = UCS2BE; case UCS2BE: case UCS2LE: + case UTF_16: case UTF_16BE: case UTF_16LE: if (c < 0x10000 || type-UCS2BE < 2U) { |