aboutsummaryrefslogtreecommitdiff
path: root/iconvdata
diff options
context:
space:
mode:
Diffstat (limited to 'iconvdata')
-rw-r--r--iconvdata/iso-2022-jp.c276
1 files changed, 156 insertions, 120 deletions
diff --git a/iconvdata/iso-2022-jp.c b/iconvdata/iso-2022-jp.c
index a7ec09b..10515d2 100644
--- a/iconvdata/iso-2022-jp.c
+++ b/iconvdata/iso-2022-jp.c
@@ -95,7 +95,14 @@ enum
JISX0201_Kana_set,
GB2312_set,
KSC5601_set,
- JISX0212_set,
+ JISX0212_set
+};
+
+/* The second value stored is the designation of the G2 set. The following
+ values are possible: */
+enum
+{
+ UNSPECIFIED_set = 0,
ISO88591_set,
ISO88597_set
};
@@ -187,12 +194,13 @@ gconv_end (struct gconv_step *data)
\
if (dir == from_iso2022jp) \
/* It's easy, we don't have to emit anything, we just reset the \
- state for the input. */ \
+ state for the input. Note that this also clears the G2 \
+ designation. */ \
data->statep->count = ASCII_set; \
else \
{ \
char *outbuf = data->outbuf; \
- \
+ \
/* We are not in the initial state. To switch back we have \
to emit the sequence `Esc ( B'. */ \
if (outbuf + 3 > data->outbufend) \
@@ -205,6 +213,7 @@ gconv_end (struct gconv_step *data)
*outbuf++ = '('; \
*outbuf++ = 'B'; \
data->outbuf = outbuf; \
+ /* Note that this also clears the G2 designation. */ \
data->statep->count = ASCII_set; \
} \
} \
@@ -319,23 +328,50 @@ gconv_end (struct gconv_step *data)
if (inptr[2] == 'A') \
{ \
/* ISO 8859-1-GR selected. */ \
- set = ISO88591_set; \
+ set2 = ISO88591_set; \
inptr += 3; \
continue; \
} \
else if (inptr[2] == 'F') \
{ \
/* ISO 8859-7-GR selected. */ \
- set = ISO88597_set; \
+ set2 = ISO88597_set; \
inptr += 3; \
continue; \
} \
} \
} \
\
- if (set == ASCII_set \
- || (var < ISO88591_set && (ch < 0x21 || ch == 0x7f)) \
- || (var >= ISO88591_set && ch < 0x20)) \
+ if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
+ { \
+ if (set2 == ISO88591_set) \
+ { \
+ ch = inptr[2] | 0x80; \
+ inptr += 3; \
+ } \
+ else if (set2 == ISO88597_set) \
+ { \
+ /* We use the table from the ISO 8859-7 module. */ \
+ if (inptr[2] < 0x20 || inptr[2] > 0x80) \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
+ if (ch == 0) \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ inptr += 3; \
+ } \
+ else \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ } \
+ else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \
/* Almost done, just advance the input pointer. */ \
++inptr; \
else if (set == JISX0201_Roman_set) \
@@ -360,24 +396,6 @@ gconv_end (struct gconv_step *data)
} \
++inptr; \
} \
- else if (set == ISO88591_set) \
- { \
- /* This is quite easy. All characters are defined and the \
- ISO 10646 value is computed by adding 0x80. */ \
- ch |= 0x80; \
- ++inptr; \
- } \
- else if (set == ISO88597_set) \
- { \
- /* We use the table from the ISO 8859-7 module. */ \
- ch = iso88597_to_ucs4[(ch & 0x7f) - 0x20]; \
- if (ch == 0) \
- { \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- ++inptr; \
- } \
else \
{ \
if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
@@ -419,8 +437,8 @@ gconv_end (struct gconv_step *data)
*((uint32_t *) outptr)++ = ch; \
}
#define EXTRA_LOOP_DECLS , enum variant var, int *setp
-#define INIT_PARAMS int set = *setp
-#define UPDATE_PARAMS *setp = set
+#define INIT_PARAMS int set = *setp % 0x100, set2 = *setp / 0x100
+#define UPDATE_PARAMS *setp = (set2 << 8) + set
#include <iconv/loop.c>
@@ -448,6 +466,9 @@ gconv_end (struct gconv_step *data)
*outptr++ = ch; \
written = 1; \
} \
+ /* At the beginning of a line, G2 designation is cleared. */ \
+ if (var == iso2022jp2 && ch == 0x0a) \
+ set2 = UNSPECIFIED_set; \
} \
else if (set == JISX0201_Roman_set) \
{ \
@@ -473,30 +494,6 @@ gconv_end (struct gconv_step *data)
else \
written = UNKNOWN_10646_CHAR; \
} \
- else if (set == ISO88591_set) \
- { \
- if (ch >= 0x80 && ch <= 0xff) \
- { \
- *outptr++ = ch; \
- written = 1; \
- } \
- } \
- else if (set == ISO88597_set) \
- { \
- const struct gap *rp = from_idx; \
- \
- while (ch > rp->end) \
- ++rp; \
- if (ch >= rp->start) \
- { \
- unsigned char res = iso88597_from_ucs4[ch + rp->idx]; \
- if (res != '\0') \
- { \
- *outptr++ = res | 0x80; \
- written = 1; \
- } \
- } \
- } \
else \
{ \
if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
@@ -530,6 +527,38 @@ gconv_end (struct gconv_step *data)
\
if (written == UNKNOWN_10646_CHAR || written == 0) \
{ \
+ if (set2 == ISO88591_set) \
+ { \
+ if (ch >= 0x80 && ch <= 0xff) \
+ { \
+ *outptr++ = ESC; \
+ *outptr++ = 'N'; \
+ *outptr++ = ch & 0x7f; \
+ written = 3; \
+ } \
+ } \
+ else if (set2 == ISO88597_set) \
+ { \
+ const struct gap *rp = from_idx; \
+ \
+ while (ch > rp->end) \
+ ++rp; \
+ if (ch >= rp->start) \
+ { \
+ unsigned char res = iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
+ if (res != '\0') \
+ { \
+ *outptr++ = ESC; \
+ *outptr++ = 'N'; \
+ *outptr++ = res; \
+ written = 3; \
+ } \
+ } \
+ } \
+ } \
+ \
+ if (written == UNKNOWN_10646_CHAR || written == 0) \
+ { \
/* Either this is an unknown character or we have to switch \
the currently selected character set. The character sets \
do not code entirely separate parts of ISO 10646 and \
@@ -541,31 +570,25 @@ gconv_end (struct gconv_step *data)
later and now simply use a fixed order in which we test for \
availability */ \
\
- /* First test whether we have at least three more bytes for \
- the escape sequence. The two charsets which require four \
- bytes will be handled later. */ \
- if (NEED_LENGTH_TEST && outptr + 3 > outend) \
- { \
- result = GCONV_FULL_OUTPUT; \
- break; \
- } \
- \
if (ch <= 0x7f) \
{ \
/* We must encode using ASCII. First write out the \
escape sequence. */ \
- *outptr++ = ESC; \
- *outptr++ = '('; \
- *outptr++ = 'B'; \
- set = ASCII_set; \
- \
- if (NEED_LENGTH_TEST && outptr == outend) \
+ if (NEED_LENGTH_TEST && outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
+ *outptr++ = ESC; \
+ *outptr++ = '('; \
+ *outptr++ = 'B'; \
+ set = ASCII_set; \
*outptr++ = ch; \
+ \
+ /* At the beginning of a line, G2 designation is cleared. */ \
+ if (var == iso2022jp2 && ch == 0x0a) \
+ set2 = UNSPECIFIED_set; \
} \
else \
{ \
@@ -580,17 +603,16 @@ gconv_end (struct gconv_step *data)
if (written != UNKNOWN_10646_CHAR && buf[0] < 0x80) \
{ \
/* We use JIS X 0201. */ \
- *outptr++ = ESC; \
- *outptr++ = '('; \
- *outptr++ = 'J'; \
- set = JISX0201_Roman_set; \
- \
- if (NEED_LENGTH_TEST && outptr == outend) \
+ if (NEED_LENGTH_TEST && outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
+ *outptr++ = ESC; \
+ *outptr++ = '('; \
+ *outptr++ = 'J'; \
+ set = JISX0201_Roman_set; \
*outptr++ = buf[0]; \
} \
else \
@@ -599,17 +621,16 @@ gconv_end (struct gconv_step *data)
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use JIS X 0208. */ \
- *outptr++ = ESC; \
- *outptr++ = '$'; \
- *outptr++ = 'B'; \
- set = JISX0208_1983_set; \
- \
- if (NEED_LENGTH_TEST && outptr + 2 > outend) \
+ if (NEED_LENGTH_TEST && outptr + 5 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
+ *outptr++ = ESC; \
+ *outptr++ = '$'; \
+ *outptr++ = 'B'; \
+ set = JISX0208_1983_set; \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
@@ -625,7 +646,7 @@ gconv_end (struct gconv_step *data)
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use JIS X 0212. */ \
- if (NEED_LENGTH_TEST && outptr + 4 > outend) \
+ if (NEED_LENGTH_TEST && outptr + 6 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@@ -635,13 +656,6 @@ gconv_end (struct gconv_step *data)
*outptr++ = '('; \
*outptr++ = 'D'; \
set = JISX0212_set; \
- \
- if (NEED_LENGTH_TEST && outptr + 2 > outend) \
- { \
- result = GCONV_FULL_OUTPUT; \
- break; \
- } \
- \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
@@ -651,33 +665,33 @@ gconv_end (struct gconv_step *data)
if (written != UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
{ \
/* We use JIS X 0201. */ \
- *outptr++ = ESC; \
- *outptr++ = '('; \
- *outptr++ = 'I'; \
- set = JISX0201_Kana_set; \
- \
- if (NEED_LENGTH_TEST && outptr == outend) \
+ if (NEED_LENGTH_TEST && outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
+ *outptr++ = ESC; \
+ *outptr++ = '('; \
+ *outptr++ = 'I'; \
+ set = JISX0201_Kana_set; \
*outptr++ = buf[0] - 0x80; \
} \
else if (ch != 0xa5 && ch >= 0x80 && ch <= 0xff) \
{ \
/* ISO 8859-1 upper half. */ \
- *outptr++ = ESC; \
- *outptr++ = '.'; \
- *outptr++ = 'A'; \
- set = ISO88591_set; \
- \
- if (NEED_LENGTH_TEST && outptr == outend) \
+ if (NEED_LENGTH_TEST && outptr + 6 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
+ *outptr++ = ESC; \
+ *outptr++ = '.'; \
+ *outptr++ = 'A'; \
+ set2 = ISO88591_set; \
+ *outptr++ = ESC; \
+ *outptr++ = 'N'; \
*outptr++ = ch; \
} \
else \
@@ -686,28 +700,27 @@ gconv_end (struct gconv_step *data)
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use GB 2312. */ \
- *outptr++ = ESC; \
- *outptr++ = '$'; \
- *outptr++ = 'A'; \
- set = GB2312_set; \
- \
- if (NEED_LENGTH_TEST && outptr + 2 > outend) \
+ if (NEED_LENGTH_TEST && outptr + 5 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
+ *outptr++ = ESC; \
+ *outptr++ = '$'; \
+ *outptr++ = 'A'; \
+ set = GB2312_set; \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
else \
{ \
- written = ucs4_to_ksc5601 (ch, buf, 2); \
+ written = ucs4_to_ksc5601 (ch, buf, 2); \
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use KSC 5601. */ \
- if (NEED_LENGTH_TEST \
- && outptr + 4 > outend) \
+ if (NEED_LENGTH_TEST \
+ && outptr + 6 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@@ -717,21 +730,44 @@ gconv_end (struct gconv_step *data)
*outptr++ = '('; \
*outptr++ = 'C'; \
set = KSC5601_set; \
- \
- if (NEED_LENGTH_TEST \
- && outptr + 2 > outend) \
- { \
- result = GCONV_FULL_OUTPUT; \
- break; \
- } \
- \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
else \
{ \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
+ const struct gap *rp = from_idx; \
+ unsigned char gch = 0; \
+ \
+ while (ch > rp->end) \
+ ++rp; \
+ if (ch >= rp->start) \
+ { \
+ ch = ch - 0xa0 + rp->idx; \
+ gch = iso88597_from_ucs4[ch]; \
+ } \
+ \
+ if (gch != 0) \
+ { \
+ /* We use ISO 8859-7 greek. */ \
+ if (NEED_LENGTH_TEST \
+ && outptr + 6 > outend) \
+ { \
+ result = GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ *outptr++ = ESC; \
+ *outptr++ = '.'; \
+ *outptr++ = 'F'; \
+ set2 = ISO88597_set; \
+ *outptr++ = ESC; \
+ *outptr++ = 'N'; \
+ *outptr++ = gch; \
+ } \
+ else \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
} \
} \
} \
@@ -745,8 +781,8 @@ gconv_end (struct gconv_step *data)
inptr += 4; \
}
#define EXTRA_LOOP_DECLS , enum variant var, int *setp
-#define INIT_PARAMS int set = *setp
-#define UPDATE_PARAMS *setp = set
+#define INIT_PARAMS int set = *setp % 0x100, set2 = *setp / 0x100
+#define UPDATE_PARAMS *setp = (set2 << 8) + set
#include <iconv/loop.c>