/* Conversion module for ISO-2022-JP and ISO-2022-JP-2. Copyright (C) 1998, 1999, 2000-2002, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ #include <assert.h> #include <dlfcn.h> #include <gconv.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include "jis0201.h" #include "jis0208.h" #include "jis0212.h" #include "gb2312.h" #include "ksc5601.h" struct gap { uint16_t start; uint16_t end; int32_t idx; }; #include "iso8859-7jp.h" /* This makes obvious what everybody knows: 0x1b is the Esc character. */ #define ESC 0x1b /* We provide our own initialization and destructor function. */ #define DEFINE_INIT 0 #define DEFINE_FINI 0 /* Definitions used in the body of the `gconv' function. */ #define FROM_LOOP from_iso2022jp_loop #define TO_LOOP to_iso2022jp_loop #define FROM_LOOP_MIN_NEEDED_FROM 1 #define FROM_LOOP_MAX_NEEDED_FROM 4 #define FROM_LOOP_MIN_NEEDED_TO 4 #define FROM_LOOP_MAX_NEEDED_TO 4 #define TO_LOOP_MIN_NEEDED_FROM 4 #define TO_LOOP_MAX_NEEDED_FROM 4 #define TO_LOOP_MIN_NEEDED_TO 1 #define TO_LOOP_MAX_NEEDED_TO 6 #define FROM_DIRECTION (dir == from_iso2022jp) #define PREPARE_LOOP \ enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \ enum variant var = ((struct iso2022jp_data *) step->__data)->var; \ int save_set; \ int *setp = &data->__statep->__count; #define EXTRA_LOOP_ARGS , var, setp /* Direction of the transformation. */ enum direction { illegal_dir, to_iso2022jp, from_iso2022jp }; /* We handle ISO-2022-jp and ISO-2022-JP-2 here. */ enum variant { illegal_var, iso2022jp, iso2022jp2 }; struct iso2022jp_data { enum direction dir; enum variant var; }; /* The COUNT element of the state keeps track of the currently selected character set. The possible values are: */ enum { ASCII_set = 0, JISX0208_1978_set = 1 << 3, JISX0208_1983_set = 2 << 3, JISX0201_Roman_set = 3 << 3, JISX0201_Kana_set = 4 << 3, GB2312_set = 5 << 3, KSC5601_set = 6 << 3, JISX0212_set = 7 << 3, CURRENT_SEL_MASK = 7 << 3 }; /* The second value stored is the designation of the G2 set. The following values are possible: */ enum { UNSPECIFIED_set = 0, ISO88591_set = 1 << 6, ISO88597_set = 2 << 6, CURRENT_ASSIGN_MASK = 3 << 6 }; /* The third value, only used during conversion from Unicode to ISO-2022-JP-2, describes the language tag parsing status. The possible values are as follows. Values >= TAG_language are temporary tag parsing states. */ enum { TAG_none = 0, TAG_language = 4 << 8, TAG_language_j = 5 << 8, TAG_language_ja = 1 << 8, TAG_language_k = 6 << 8, TAG_language_ko = 2 << 8, TAG_language_z = 7 << 8, TAG_language_zh = 3 << 8, CURRENT_TAG_MASK = 7 << 8 }; extern int gconv_init (struct __gconv_step *step); int gconv_init (struct __gconv_step *step) { /* Determine which direction. */ struct iso2022jp_data *new_data; enum direction dir = illegal_dir; enum variant var = illegal_var; int result; if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0) { dir = from_iso2022jp; var = iso2022jp; } else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0) { dir = to_iso2022jp; var = iso2022jp; } else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0) { dir = from_iso2022jp; var = iso2022jp2; } else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0) { dir = to_iso2022jp; var = iso2022jp2; } result = __GCONV_NOCONV; if (__builtin_expect (dir, from_iso2022jp) != illegal_dir) { new_data = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data)); result = __GCONV_NOMEM; if (new_data != NULL) { new_data->dir = dir; new_data->var = var; step->__data = new_data; if (dir == from_iso2022jp) { step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM; step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM; step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO; step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO; } else { step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM; step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM; step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO; step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO; } /* Yes, this is a stateful encoding. */ step->__stateful = 1; result = __GCONV_OK; } } return result; } extern void gconv_end (struct __gconv_step *data); void gconv_end (struct __gconv_step *data) { free (data->__data); } /* Since this is a stateful encoding we have to provide code which resets the output state to the initial state. This has to be done during the flushing. */ #define EMIT_SHIFT_TO_INIT \ /* Avoid warning about unused variable 'var'. */ \ (void) var; \ \ if ((data->__statep->__count & ~7) != ASCII_set) \ { \ if (dir == from_iso2022jp \ || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \ { \ /* It's easy, we don't have to emit anything, we just reset the \ state for the input. Note that this also clears the G2 \ designation. */ \ data->__statep->__count &= 7; \ data->__statep->__count |= ASCII_set; \ } \ else \ { \ /* We are not in the initial state. To switch back we have \ to emit the sequence `Esc ( B'. */ \ if (__builtin_expect (outbuf + 3 > outend, 0)) \ /* We don't have enough room in the output buffer. */ \ status = __GCONV_FULL_OUTPUT; \ else \ { \ /* Write out the shift sequence. */ \ *outbuf++ = ESC; \ *outbuf++ = '('; \ *outbuf++ = 'B'; \ /* Note that this also clears the G2 designation. */ \ data->__statep->__count &= 7; \ data->__statep->__count |= ASCII_set; \ } \ } \ } /* Since we might have to reset input pointer we must be able to save and retore the state. */ #define SAVE_RESET_STATE(Save) \ if (Save) \ save_set = *setp; \ else \ *setp = save_set /* First define the conversion function from ISO-2022-JP to UCS4. */ #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO #define LOOPFCT FROM_LOOP #define BODY \ { \ uint32_t ch = *inptr; \ \ /* Recognize escape sequences. */ \ if (__builtin_expect (ch, 0) == ESC) \ { \ /* We now must be prepared to read two to three more \ chracters. If we have a match in the first character but \ then the input buffer ends we terminate with an error since \ we must not risk missing an escape sequence just because it \ is not entirely in the current input buffer. */ \ if (__builtin_expect (inptr + 2 >= inend, 0) \ || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \ && __builtin_expect (inptr + 3 >= inend, 0))) \ { \ /* Not enough input available. */ \ result = __GCONV_INCOMPLETE_INPUT; \ break; \ } \ \ if (inptr[1] == '(') \ { \ if (inptr[2] == 'B') \ { \ /* ASCII selected. */ \ set = ASCII_set; \ inptr += 3; \ continue; \ } \ else if (inptr[2] == 'J') \ { \ /* JIS X 0201 selected. */ \ set = JISX0201_Roman_set; \ inptr += 3; \ continue; \ } \ else if (var == iso2022jp2 && inptr[2] == 'I') \ { \ /* JIS X 0201 selected. */ \ set = JISX0201_Kana_set; \ inptr += 3; \ continue; \ } \ } \ else if (inptr[1] == '$') \ { \ if (inptr[2] == '@') \ { \ /* JIS X 0208-1978 selected. */ \ set = JISX0208_1978_set; \ inptr += 3; \ continue; \ } \ else if (inptr[2] == 'B') \ { \ /* JIS X 0208-1983 selected. */ \ set = JISX0208_1983_set; \ inptr += 3; \ continue; \ } \ else if (var == iso2022jp2) \ { \ if (inptr[2] == 'A') \ { \ /* GB 2312-1980 selected. */ \ set = GB2312_set; \ inptr += 3; \ continue; \ } \ else if (inptr[2] == '(') \ { \ if (inptr[3] == 'C') \ { \ /* KSC 5601-1987 selected. */ \ set = KSC5601_set; \ inptr += 4; \ continue; \ } \ else if (inptr[3] == 'D') \ { \ /* JIS X 0212-1990 selected. */ \ set = JISX0212_set; \ inptr += 4; \ continue; \ } \ } \ } \ } \ else if (var == iso2022jp2 && inptr[1] == '.') \ { \ if (inptr[2] == 'A') \ { \ /* ISO 8859-1-GR selected. */ \ set2 = ISO88591_set; \ inptr += 3; \ continue; \ } \ else if (inptr[2] == 'F') \ { \ /* ISO 8859-7-GR selected. */ \ set2 = ISO88597_set; \ inptr += 3; \ continue; \ } \ } \ } \ \ if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \ { \ if (set2 == ISO88591_set) \ { \ ch = inptr[2] | 0x80; \ inptr += 3; \ } \ else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \ { \ /* We use the table from the ISO 8859-7 module. */ \ if (inptr[2] < 0x20 || inptr[2] >= 0x80) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ ch = iso88597_to_ucs4[inptr[2] - 0x20]; \ if (ch == 0) \ STANDARD_FROM_LOOP_ERR_HANDLER (3); \ inptr += 3; \ } \ else \ { \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ } \ else if (ch >= 0x80) \ { \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \ /* Almost done, just advance the input pointer. */ \ ++inptr; \ else if (set == JISX0201_Roman_set) \ { \ /* Use the JIS X 0201 table. */ \ ch = jisx0201_to_ucs4 (ch); \ if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0)) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ ++inptr; \ } \ else if (set == JISX0201_Kana_set) \ { \ /* Use the JIS X 0201 table. */ \ ch = jisx0201_to_ucs4 (ch + 0x80); \ if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0)) \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ ++inptr; \ } \ else \ { \ if (set == JISX0208_1978_set || set == JISX0208_1983_set) \ /* XXX I don't have the tables for these two old variants of \ JIS X 0208. Therefore I'm using the tables for JIS X \ 0208-1990. If somebody has problems with this please \ provide the appropriate tables. */ \ ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \ else if (set == JISX0212_set) \ /* Use the JIS X 0212 table. */ \ ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \ else if (set == GB2312_set) \ /* Use the GB 2312 table. */ \ ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \ else \ { \ assert (set == KSC5601_set); \ \ /* Use the KSC 5601 table. */ \ ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \ } \ \ if (__builtin_expect (ch == 0, 0)) \ { \ result = __GCONV_INCOMPLETE_INPUT; \ break; \ } \ else if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0)) \ { \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ } \ \ put32 (outptr, ch); \ outptr += 4; \ } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS , enum variant var, int *setp #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \ int set2 = *setp & CURRENT_ASSIGN_MASK #define UPDATE_PARAMS *setp = set | set2 #include <iconv/loop.c> /* Next, define the other direction. */ enum conversion { none = 0, european, japanese, chinese, korean, other }; /* A datatype for conversion lists. */ typedef unsigned int cvlist_t; #define CVLIST(cv1, cv2, cv3, cv4, cv5) \ ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12)) #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1)) #define CVLIST_REST(cvl) ((cvl) >> 3) static const cvlist_t conversion_lists[4] = { /* TAG_none */ CVLIST (japanese, european, chinese, korean, other), /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other), /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other), /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other) }; #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO #define LOOPFCT TO_LOOP #define BODY \ { \ uint32_t ch; \ size_t written; \ \ ch = get32 (inptr); \ \ if (var == iso2022jp2) \ { \ /* Handle Unicode tag characters (range U+E0000..U+E007F). */ \ if (__builtin_expect ((ch >> 7) == (0xe0000 >> 7), 0)) \ { \ ch &= 0x7f; \ if (ch >= 'A' && ch <= 'Z') \ ch += 'a' - 'A'; \ if (ch == 0x01) \ tag = TAG_language; \ else if (ch == 'j' && tag == TAG_language) \ tag = TAG_language_j; \ else if (ch == 'a' && tag == TAG_language_j) \ tag = TAG_language_ja; \ else if (ch == 'k' && tag == TAG_language) \ tag = TAG_language_k; \ else if (ch == 'o' && tag == TAG_language_k) \ tag = TAG_language_ko; \ else if (ch == 'z' && tag == TAG_language) \ tag = TAG_language_z; \ else if (ch == 'h' && tag == TAG_language_z) \ tag = TAG_language_zh; \ else if (ch == 0x7f) \ tag = TAG_none; \ else \ { \ /* Other tag characters reset the tag parsing state (if the \ current state is a temporary state) or are ignored (if \ the current state is a stable one). */ \ if (tag >= TAG_language) \ tag = TAG_none; \ } \ \ inptr += 4; \ continue; \ } \ \ /* Non-tag characters reset the tag parsing state, if the current \ state is a temporary state. */ \ if (__builtin_expect (tag >= TAG_language, 0)) \ tag = TAG_none; \ } \ \ /* First see whether we can write the character using the currently \ selected character set. But ignore the selected character set if \ the current language tag shows different preferences. */ \ if (set == ASCII_set) \ { \ /* Please note that the NUL byte is *not* matched if we are not \ currently using the ASCII charset. This is because we must \ switch to the initial state whenever a NUL byte is written. */ \ if (ch <= 0x7f) \ { \ *outptr++ = ch; \ written = 1; \ \ /* At the beginning of a line, G2 designation is cleared. */ \ if (var == iso2022jp2 && ch == 0x0a) \ set2 = UNSPECIFIED_set; \ } \ else \ written = __UNKNOWN_10646_CHAR; \ } \ /* ISO-2022-JP recommends to encode the newline character always in \ ASCII since this allows a context-free interpretation of the \ characters at the beginning of the next line. Otherwise it would \ have to be known whether the last line ended using ASCII or \ JIS X 0201. */ \ else if (set == JISX0201_Roman_set \ && (__builtin_expect (tag == TAG_none, 1) \ || tag == TAG_language_ja)) \ { \ unsigned char buf[1]; \ written = ucs4_to_jisx0201 (ch, buf); \ if (written != __UNKNOWN_10646_CHAR) \ { \ if (buf[0] > 0x20 && buf[0] < 0x80) \ { \ *outptr++ = buf[0]; \ written = 1; \ } \ else \ written = __UNKNOWN_10646_CHAR; \ } \ } \ else if (set == JISX0201_Kana_set \ && (__builtin_expect (tag == TAG_none, 1) \ || tag == TAG_language_ja)) \ { \ unsigned char buf[1]; \ written = ucs4_to_jisx0201 (ch, buf); \ if (written != __UNKNOWN_10646_CHAR) \ { \ if (buf[0] > 0xa0 && buf[0] < 0xe0) \ { \ *outptr++ = buf[0] - 0x80; \ written = 1; \ } \ else \ written = __UNKNOWN_10646_CHAR; \ } \ } \ else \ { \ if ((set == JISX0208_1978_set || set == JISX0208_1983_set) \ && (__builtin_expect (tag == TAG_none, 1) \ || tag == TAG_language_ja)) \ written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \ else if (set == JISX0212_set \ && (__builtin_expect (tag == TAG_none, 1) \ || tag == TAG_language_ja)) \ written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \ else if (set == GB2312_set \ && (__builtin_expect (tag == TAG_none, 1) \ || tag == TAG_language_zh)) \ written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \ else if (set == KSC5601_set \ && (__builtin_expect (tag == TAG_none, 1) \ || tag == TAG_language_ko)) \ written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \ else \ written = __UNKNOWN_10646_CHAR; \ \ if (__builtin_expect (written == 0, 0)) \ { \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ else if (written != __UNKNOWN_10646_CHAR) \ outptr += written; \ } \ \ if (written == __UNKNOWN_10646_CHAR \ && __builtin_expect (tag == TAG_none, 1)) \ { \ if (set2 == ISO88591_set) \ { \ if (ch >= 0x80 && ch <= 0xff) \ { \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ \ *outptr++ = ESC; \ *outptr++ = 'N'; \ *outptr++ = ch & 0x7f; \ written = 3; \ } \ } \ else if (set2 == ISO88597_set) \ { \ if (__builtin_expect (ch < 0xffff, 1)) \ { \ const struct gap *rp = from_idx; \ \ while (ch > rp->end) \ ++rp; \ if (ch >= rp->start) \ { \ unsigned char res = \ iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \ if (res != '\0') \ { \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ \ *outptr++ = ESC; \ *outptr++ = 'N'; \ *outptr++ = res & 0x7f; \ written = 3; \ } \ } \ } \ } \ } \ \ if (written == __UNKNOWN_10646_CHAR) \ { \ /* The attempts to use the currently selected character set \ failed, either because the language tag changed, or because \ the character requires a different character set, or because \ the character is unknown. \ The CJK character sets partially overlap when seen as subsets \ of ISO 10646; therefore there is no single correct result. \ We use a preferrence order which depends on the language tag. */ \ \ if (ch <= 0x7f) \ { \ /* We must encode using ASCII. First write out the \ escape sequence. */ \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ \ *outptr++ = ESC; \ *outptr++ = '('; \ *outptr++ = 'B'; \ set = ASCII_set; \ \ if (__builtin_expect (outptr + 1 > outend, 0)) \ { \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ch; \ \ /* At the beginning of a line, G2 designation is cleared. */ \ if (var == iso2022jp2 && ch == 0x0a) \ set2 = UNSPECIFIED_set; \ } \ else \ { \ /* Now it becomes difficult. We must search the other \ character sets one by one. Use an ordered conversion \ list that depends on the current language tag. */ \ cvlist_t conversion_list; \ unsigned char buf[2]; \ int res = __GCONV_ILLEGAL_INPUT; \ \ if (var == iso2022jp2) \ conversion_list = conversion_lists[tag >> 8]; \ else \ conversion_list = CVLIST (japanese, 0, 0, 0, 0); \ \ do \ switch (CVLIST_FIRST (conversion_list)) \ { \ case european: \ \ /* Try ISO 8859-1 upper half. */ \ if (ch >= 0x80 && ch <= 0xff) \ { \ if (set2 != ISO88591_set) \ { \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '.'; \ *outptr++ = 'A'; \ set2 = ISO88591_set; \ } \ \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = 'N'; \ *outptr++ = ch - 0x80; \ res = __GCONV_OK; \ break; \ } \ \ /* Try ISO 8859-7 upper half. */ \ if (__builtin_expect (ch < 0xffff, 1)) \ { \ const struct gap *rp = from_idx; \ \ while (ch > rp->end) \ ++rp; \ if (ch >= rp->start) \ { \ unsigned char ch2 = \ iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \ if (ch2 != '\0') \ { \ if (set2 != ISO88597_set) \ { \ if (__builtin_expect (outptr + 3 > outend, \ 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '.'; \ *outptr++ = 'F'; \ set2 = ISO88597_set; \ } \ \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = 'N'; \ *outptr++ = ch2 - 0x80; \ res = __GCONV_OK; \ break; \ } \ } \ } \ \ break; \ \ case japanese: \ \ /* Try JIS X 0201 Roman. */ \ written = ucs4_to_jisx0201 (ch, buf); \ if (written != __UNKNOWN_10646_CHAR \ && buf[0] > 0x20 && buf[0] < 0x80) \ { \ if (set != JISX0201_Roman_set) \ { \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '('; \ *outptr++ = 'J'; \ set = JISX0201_Roman_set; \ } \ \ if (__builtin_expect (outptr + 1 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = buf[0]; \ res = __GCONV_OK; \ break; \ } \ \ /* Try JIS X 0208. */ \ written = ucs4_to_jisx0208 (ch, buf, 2); \ if (written != __UNKNOWN_10646_CHAR) \ { \ if (set != JISX0208_1983_set) \ { \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '$'; \ *outptr++ = 'B'; \ set = JISX0208_1983_set; \ } \ \ if (__builtin_expect (outptr + 2 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = buf[0]; \ *outptr++ = buf[1]; \ res = __GCONV_OK; \ break; \ } \ \ if (__builtin_expect (var == iso2022jp, 0)) \ /* Don't use the other Japanese character sets. */ \ break; \ \ /* Try JIS X 0212. */ \ written = ucs4_to_jisx0212 (ch, buf, 2); \ if (written != __UNKNOWN_10646_CHAR) \ { \ if (set != JISX0212_set) \ { \ if (__builtin_expect (outptr + 4 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '$'; \ *outptr++ = '('; \ *outptr++ = 'D'; \ set = JISX0212_set; \ } \ \ if (__builtin_expect (outptr + 2 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = buf[0]; \ *outptr++ = buf[1]; \ res = __GCONV_OK; \ break; \ } \ \ break; \ \ case chinese: \ assert (var == iso2022jp2); \ \ /* Try GB 2312. */ \ written = ucs4_to_gb2312 (ch, buf, 2); \ if (written != __UNKNOWN_10646_CHAR) \ { \ if (set != GB2312_set) \ { \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '$'; \ *outptr++ = 'A'; \ set = GB2312_set; \ } \ \ if (__builtin_expect (outptr + 2 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = buf[0]; \ *outptr++ = buf[1]; \ res = __GCONV_OK; \ break; \ } \ \ break; \ \ case korean: \ assert (var == iso2022jp2); \ \ /* Try KSC 5601. */ \ written = ucs4_to_ksc5601 (ch, buf, 2); \ if (written != __UNKNOWN_10646_CHAR) \ { \ if (set != KSC5601_set) \ { \ if (__builtin_expect (outptr + 4 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '$'; \ *outptr++ = '('; \ *outptr++ = 'C'; \ set = KSC5601_set; \ } \ \ if (__builtin_expect (outptr + 2 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = buf[0]; \ *outptr++ = buf[1]; \ res = __GCONV_OK; \ break; \ } \ \ break; \ \ case other: \ assert (var == iso2022jp2); \ \ /* Try JIS X 0201 Kana. This is not officially part \ of ISO-2022-JP-2, according to RFC 1554. Therefore \ we try this only after all other attempts. */ \ written = ucs4_to_jisx0201 (ch, buf); \ if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \ { \ if (set != JISX0201_Kana_set) \ { \ if (__builtin_expect (outptr + 3 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = ESC; \ *outptr++ = '('; \ *outptr++ = 'I'; \ set = JISX0201_Kana_set; \ } \ \ if (__builtin_expect (outptr + 1 > outend, 0)) \ { \ res = __GCONV_FULL_OUTPUT; \ break; \ } \ *outptr++ = buf[0] - 0x80; \ res = __GCONV_OK; \ break; \ } \ \ break; \ \ default: \ abort (); \ } \ while (res == __GCONV_ILLEGAL_INPUT \ && (conversion_list = CVLIST_REST (conversion_list)) != 0);\ \ if (res == __GCONV_FULL_OUTPUT) \ { \ result = res; \ break; \ } \ \ if (res == __GCONV_ILLEGAL_INPUT) \ { \ STANDARD_TO_LOOP_ERR_HANDLER (4); \ } \ } \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ inptr += 4; \ } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS , enum variant var, int *setp #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \ int set2 = *setp & CURRENT_ASSIGN_MASK; \ int tag = *setp & CURRENT_TAG_MASK; #define REINIT_PARAMS do \ { \ set = *setp & CURRENT_SEL_MASK; \ set2 = *setp & CURRENT_ASSIGN_MASK; \ tag = *setp & CURRENT_TAG_MASK; \ } \ while (0) #define UPDATE_PARAMS *setp = set | set2 | tag #include <iconv/loop.c> /* Now define the toplevel functions. */ #include <iconv/skeleton.c>