diff options
Diffstat (limited to 'iconvdata')
47 files changed, 2345 insertions, 3853 deletions
diff --git a/iconvdata/8bit-gap.c b/iconvdata/8bit-gap.c index a8d3c99..4065a6d 100644 --- a/iconvdata/8bit-gap.c +++ b/iconvdata/8bit-gap.c @@ -19,10 +19,7 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> #include <stdint.h> -#include <string.h> - struct gap { @@ -34,192 +31,68 @@ struct gap /* Now we can include the tables. */ #include TABLES -/* We use three objects to describe the operation mode. */ -static int from_8bit_object; -static int to_8bit_object; - - -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, NAME) != NULL) - step->data = &from_8bit_object; - else if (strcasestr (step->to_name, NAME) != NULL) - step->data = &to_8bit_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_8bit_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - wchar_t ch = to_ucs4[((unsigned char *) inbuf)[cnt]]; - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - const struct gap *rp = from_idx; - unsigned int ch = *((wchar_t *) (inbuf + cnt)); - char res; - - while (ch > rp->end) - ++rp; - if (ch < rp->start) - /* No valid character. */ - break; - - res = from_ucs4[ch + rp->idx]; - if (res == '\0' && ch != 0) - /* No valid character. */ - break; - - outbuf[outchars] = res; - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_8bit_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - return result; -} +#define FROM_LOOP from_gap +#define TO_LOOP to_gap +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from the 8bit charset to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = to_ucs4[*inptr]; \ + \ + if (HAS_HOLES && ch == L'\0' && *inptr != '\0') \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + ++inptr; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + const struct gap *rp = from_idx; \ + uint32_t ch = *((uint32_t *) inptr); \ + unsigned char res; \ + \ + while (ch > rp->end) \ + ++rp; \ + if (ch < rp->start) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + res = from_ucs4[ch + rp->idx]; \ + if (ch != 0 && res == '\0') \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *outptr++ = res; \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/8bit-generic.c b/iconvdata/8bit-generic.c index 19194ad..2ea3331 100644 --- a/iconvdata/8bit-generic.c +++ b/iconvdata/8bit-generic.c @@ -18,186 +18,56 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> -#include <string.h> - -/* We use three objects to describe the operation mode. */ -static int from_8bit_object; -static int to_8bit_object; - - -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, NAME) != NULL) - step->data = &from_8bit_object; - else if (strcasestr (step->to_name, NAME) != NULL) - step->data = &to_8bit_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_8bit_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - wchar_t ch = to_ucs4[((unsigned char *) inbuf)[cnt]]; - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - int ch = *((wchar_t *) (inbuf + cnt)); - - if (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0]) - || ch < 0 || (from_ucs4[ch] == '\0' && ch != 0)) - break; - - outbuf[outchars] = from_ucs4[ch]; - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_8bit_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +#define FROM_LOOP from_generic +#define TO_LOOP to_generic +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from the 8bit charset to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = to_ucs4[*inptr]; \ + \ + if (HAS_HOLES && ch == L'\0' && *inptr != '\0') \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + ++inptr; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + \ + if (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0]) \ + || (ch != 0 && from_ucs4[ch] == '\0')) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *outptr++ = from_ucs4[ch]; \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/Makefile b/iconvdata/Makefile index dd1c391..6957685 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -26,8 +26,8 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \ ISO8859-6 ISO8859-7 ISO8859-8 ISO8859-9 ISO8859-10 \ T.61 ISO_6937 SJIS KOI-8 KOI8-R LATIN-GREEK LATIN-GREEK-1 \ HP-ROMAN8 EBCDIC-AT-DE EBCDIC-AT-DE-A EBCDIC-CA-FR \ - EUC-KR UHC JOHAB libJIS libKSC ISO646 BIG5 EUC-JP libGB \ - EUC-CN libCNS EUC-TW + EUC-KR UHC JOHAB libJIS libKSC BIG5 EUC-JP libGB \ + EUC-CN libCNS EUC-TW # ISO646 modules.so := $(addsuffix .so, $(modules)) @@ -211,7 +211,7 @@ endif include ../Rules .PHONY: do-iconv-test -tests: do-iconv-test +#tests: do-iconv-test do-iconv-test: run-iconv-test.sh $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) \ diff --git a/iconvdata/big5.c b/iconvdata/big5.c index a6a2580..2962712 100644 --- a/iconvdata/big5.c +++ b/iconvdata/big5.c @@ -8411,289 +8411,180 @@ static const char from_ucs4_tab13[][2] = }; -/* Direction of the transformation. */ -static int to_big5_object; -static int from_big5_object; +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "BIG5" +#define FROM_LOOP from_big5 +#define TO_LOOP to_big5 +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "BIG5") != NULL) - step->data = &from_big5_object; - else if (strcasestr (step->to_name, "BIG5") != NULL) - step->data = &to_big5_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_big5_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = inbuf[cnt]; - wchar_t ch; - - if (inchar < '\xa1' || inchar > '\xfe') - ch = (wchar_t) inchar; - else - { - /* Two-byte character. First test whether the next - character is also available. */ - int inchar2; - int idx; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - idx = (inchar - 0xa1) * 157; - inchar2 = inbuf[++cnt]; - /* See whether the second byte is in the correct - range. */ - if (inchar2 >= '\x40' && inchar2 <= '\x7e') - idx += inchar2 - 0x40; - else if (inchar2 >= '\xa1' && inchar2 <= '\xfe') - idx += 0x3f + (inchar2 - 0xa1); - else - { - /* This is illegal. */ - --cnt; - result = GCONV_ILLEGAL_INPUT; - break; - } - - /* Get the value from the table. */ - ch = big5_to_ucs[idx]; - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - int ch = *((wchar_t *) (inbuf + cnt)); - char buf[2]; - const char *cp; - - if (ch >= (sizeof (from_ucs4_tab1) - / sizeof (from_ucs4_tab1[0]))) - { - if (ch >= 0x2c7 && ch <= 0x2d9) - cp = from_ucs4_tab2[ch - 0x2c7]; - else if (ch >= 0x391 && ch <= 0x451) - cp = from_ucs4_tab3[ch - 0x391]; - else if (ch >= 0x2013 && ch <= 0x203e) - cp = from_ucs4_tab4[ch - 0x2013]; - else if (ch == 0x2103) - cp = "\xa2\x4a"; - else if (ch == 0x2105) - cp = "\xa1\xc1"; - else if (ch == 0x2109) - cp = "\xa2\x4b"; - else if (ch >= 0x2160 && ch <= 0x2169) - { - buf[0] = '\xa2'; - buf[1] = '\xb9' + (ch - 0x2160); - cp = buf; - } - else if (ch >= 0x2190 && ch <= 0x2199) - cp = from_ucs4_tab5[ch - 0x2190]; - else if (ch >= 0x221a && ch <= 0x22bf) - cp = from_ucs4_tab6[ch - 0x221a]; - else if (ch >= 0x2460 && ch <= 0x247d) - cp = from_ucs4_tab7[ch - 0x2460]; - else if (ch >= 0x2500 && ch <= 0x2642) - cp = from_ucs4_tab8[ch - 0x2500]; - else if (ch >= 0x3000 && ch <= 0x3129) - cp = from_ucs4_tab9[ch - 0x3000]; - else if (ch == 0x32a3) - cp = "\xa1\xc0"; - else if (ch >= 0x338e && ch <= 0x33d5) - cp = from_ucs4_tab10[ch - 0x338e]; - else if (ch >= 0x4e00 && ch <= 0x9fa4) - cp = from_ucs4_tab11[ch - 0x4e00]; - else if (ch == 0xfa0c) - cp = "\xc9\x4a"; - else if (ch == 0xfa0d) - cp = "\xdd\xfc"; - else if (ch >= 0xfe30 && ch <= 0xfe6b) - cp = from_ucs4_tab12[ch - 0xfe30]; - else if (ch >= 0xff01 && ch <= 0xff64) - cp = from_ucs4_tab13[ch - 0xff01]; - else - /* Illegal character. */ - break; - } - else - cp = from_ucs4_tab1[ch]; - - if (cp[0] == '\0' && ch != 0) - /* Illegal character. */ - break; - - outbuf[outchars] = cp[0]; - /* Now test for a possible second byte and write this - if possible. */ - if (cp[1] != '\0') - { - if (outchars + 1 >= data->outbufsize) - { - /* The result does not fit into the buffer. */ - extra = 1; - break; - } - outbuf[++outchars] = cp[1]; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_big5_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; +/* First define the conversion function from Big5 to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + if (ch >= '\xa1' && ch <= '\xff') \ + { \ + /* Two-byte character. First test whether the next character \ + is also available. */ \ + uint32_t ch2; \ + int idx; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + idx = (ch - 0xa1) * 157; \ + ch2 = inptr[1]; \ + /* See whether the second byte is in the correct range. */ \ + if (ch2 >= '\x40' && ch2 <= '\x7e') \ + idx += ch2 - 0x40; \ + else if (ch2 >= '\xa1' && ch2 <= '\xfe') \ + idx += 0x3f + (ch2 - 0xa1); \ + else \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* Get the value from the table. */ \ + ch = big5_to_ucs[idx]; \ + \ + /* Is this character defined? */ \ + if (ch == L'\0' && *inptr != '\0') \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 2; \ + } \ + else \ + ++inptr; \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + char buf[2]; \ + const char *cp; \ + \ + if (ch >= sizeof (from_ucs4_tab1) / sizeof (from_ucs4_tab1[0])) \ + switch (ch) \ + { \ + case 0x2c7 ... 0x2d9: \ + cp = from_ucs4_tab2[ch - 0x2c7]; \ + break; \ + case 0x391 ... 0x451: \ + cp = from_ucs4_tab3[ch - 0x391]; \ + break; \ + case 0x2013 ... 0x203e: \ + cp = from_ucs4_tab4[ch - 0x2013]; \ + break; \ + case 0x2103: \ + cp = "\xa2\x4a"; \ + break; \ + case 0x2105: \ + cp = "\xa1\xc1"; \ + break; \ + case 0x2109: \ + cp = "\xa2\x4b"; \ + break; \ + case 0x2160 ... 0x2169: \ + { \ + buf[0] = '\xa2'; \ + buf[1] = '\xb9' + (ch - 0x2160); \ + cp = buf; \ + } \ + break; \ + case 0x2190 ... 0x2199: \ + cp = from_ucs4_tab5[ch - 0x2190]; \ + break; \ + case 0x221a ... 0x22bf: \ + cp = from_ucs4_tab6[ch - 0x221a]; \ + break; \ + case 0x2460 ... 0x247d: \ + cp = from_ucs4_tab7[ch - 0x2460]; \ + break; \ + case 0x2500 ... 0x2642: \ + cp = from_ucs4_tab8[ch - 0x2500]; \ + break; \ + case 0x3000 ... 0x3129: \ + cp = from_ucs4_tab9[ch - 0x3000]; \ + break; \ + case 0x32a3: \ + cp = "\xa1\xc0"; \ + break; \ + case 0x338e ... 0x33d5: \ + cp = from_ucs4_tab10[ch - 0x338e]; \ + break; \ + case 0x4e00 ... 0x9fa4: \ + cp = from_ucs4_tab11[ch - 0x4e00]; \ + break; \ + case 0xfa0c: \ + cp = "\xc9\x4a"; \ + break; \ + case 0xfa0d: \ + cp = "\xdd\xfc"; \ + break; \ + case 0xfe30 ... 0xfe6b: \ + cp = from_ucs4_tab12[ch - 0xfe30]; \ + break; \ + case 0xff01 ... 0xff64: \ + cp = from_ucs4_tab13[ch - 0xff01]; \ + break; \ + default: \ + /* Illegal character. */ \ + cp = ""; \ + break; \ + } \ + else \ + cp = from_ucs4_tab1[ch]; \ + \ + if (cp[0] == '\0' && ch != 0) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* See whether there is enough room for the second byte we write. */ \ + if (NEED_LENGTH_TEST && cp[1] != '\0' && outptr + 1 >= outend) \ + { \ + /* We have not enough room. */ \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + *outptr++ = cp[0]; \ + if (cp[1] != '\0') \ + *outptr++ = cp[1]; \ + inptr += 4; \ + } +#include <iconv/loop.c> - if (written != NULL && data->is_last) - *written = do_write; - return result; -} +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/cns11643.c b/iconvdata/cns11643.c index 9035482..bd1994f 100644 --- a/iconvdata/cns11643.c +++ b/iconvdata/cns11643.c @@ -44,7 +44,7 @@ printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const uint16_t cns11643l2_to_ucs4_tab[] = +const uint16_t __cns11643l2_to_ucs4_tab[] = { [0x0000] = 0x4e42, [0x0001] = 0x4e5c, [0x0002] = 0x51f5, [0x0003] = 0x531a, [0x0004] = 0x5382, [0x0005] = 0x4e07, [0x0006] = 0x4e0c, [0x0007] = 0x4e47, @@ -1985,7 +1985,7 @@ const uint16_t cns11643l2_to_ucs4_tab[] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const uint16_t cns11643l14_to_ucs4_tab[] = +const uint16_t __cns11643l14_to_ucs4_tab[] = { [0x0000] = 0x4e28, [0x0001] = 0x4e36, [0x0002] = 0x4e3f, [0x0003] = 0x4e85, [0x0004] = 0x4e05, [0x0005] = 0x4e04, [0x0006] = 0x5182, [0x0007] = 0x5196, @@ -3064,7 +3064,7 @@ const uint16_t cns11643l14_to_ucs4_tab[] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643_from_ucs4_tab[][3] = +const char __cns11643_from_ucs4_tab[][3] = { [0x0000] = "\x01\x44\x21", [0x0001] = "\x01\x44\x23", [0x0003] = "\x01\x44\x24", [0x0004] = "\x0e\x21\x26", diff --git a/iconvdata/cns11643.h b/iconvdata/cns11643.h index f791d4c..ccab801 100644 --- a/iconvdata/cns11643.h +++ b/iconvdata/cns11643.h @@ -21,8 +21,8 @@ #include <stdint.h> /* Table for CNS 11643, plane 2 to UCS4 conversion. */ -extern const uint16_t cns11643l2_to_ucs4_tab[]; -extern const uint16_t cns11643l14_to_ucs4_tab[]; +extern const uint16_t __cns11643l2_to_ucs4_tab[]; +extern const uint16_t __cns11643l14_to_ucs4_tab[]; static inline wchar_t @@ -54,19 +54,19 @@ cns11643_to_ucs4 (const char **s, size_t avail, unsigned char offset) { if (idx > 0x2196) return UNKNOWN_10646_CHAR; - result = cns11643l1_to_ucs4_tab[idx]; + result = __cns11643l1_to_ucs4_tab[idx]; } else if ((ch - 0x21 - offset) == 2) { if (idx > 0x1de1) return UNKNOWN_10646_CHAR; - result = cns11643l2_to_ucs4_tab[idx]; + result = __cns11643l2_to_ucs4_tab[idx]; } else if ((ch - 0x21 - offset) == 0xe) { if (idx > 0x19bd) return UNKNOWN_10646_CHAR; - result = cns11643l14_to_ucs4_tab[idx]; + result = __cns11643l14_to_ucs4_tab[idx]; } else return UNKNOWN_10646_CHAR; @@ -81,21 +81,21 @@ cns11643_to_ucs4 (const char **s, size_t avail, unsigned char offset) /* Tables for the UCS4 -> CNS conversion. */ -extern const char cns11643l1_from_ucs4_tab1[][2]; -extern const char cns11643l1_from_ucs4_tab2[][2]; -extern const char cns11643l1_from_ucs4_tab3[][2]; -extern const char cns11643l1_from_ucs4_tab4[][2]; -extern const char cns11643l1_from_ucs4_tab5[][2]; -extern const char cns11643l1_from_ucs4_tab6[][2]; -extern const char cns11643l1_from_ucs4_tab7[][2]; -extern const char cns11643l1_from_ucs4_tab8[][2]; -extern const char cns11643l1_from_ucs4_tab9[][2]; -extern const char cns11643l1_from_ucs4_tab10[][2]; -extern const char cns11643l1_from_ucs4_tab11[][2]; -extern const char cns11643l1_from_ucs4_tab12[][2]; -extern const char cns11643l1_from_ucs4_tab13[][2]; -extern const char cns11643l1_from_ucs4_tab14[][2]; -extern const char cns11643_from_ucs4_tab[][3]; +extern const char __cns11643l1_from_ucs4_tab1[][2]; +extern const char __cns11643l1_from_ucs4_tab2[][2]; +extern const char __cns11643l1_from_ucs4_tab3[][2]; +extern const char __cns11643l1_from_ucs4_tab4[][2]; +extern const char __cns11643l1_from_ucs4_tab5[][2]; +extern const char __cns11643l1_from_ucs4_tab6[][2]; +extern const char __cns11643l1_from_ucs4_tab7[][2]; +extern const char __cns11643l1_from_ucs4_tab8[][2]; +extern const char __cns11643l1_from_ucs4_tab9[][2]; +extern const char __cns11643l1_from_ucs4_tab10[][2]; +extern const char __cns11643l1_from_ucs4_tab11[][2]; +extern const char __cns11643l1_from_ucs4_tab12[][2]; +extern const char __cns11643l1_from_ucs4_tab13[][2]; +extern const char __cns11643l1_from_ucs4_tab14[][2]; +extern const char __cns11643_from_ucs4_tab[][3]; static inline size_t @@ -103,120 +103,104 @@ ucs4_to_cns11643 (wchar_t wch, char *s, size_t avail) { unsigned int ch = (unsigned int) wch; char buf[2]; - const char *cp = NULL; + const char *cp = buf; int needed = 2; - if (ch < 0xa7) - cp = ""; - else if (ch < 0xf7) - cp = cns11643l1_from_ucs4_tab1[ch - 0xa7]; - else if (ch < 0x2c7) - cp = ""; - else if (ch <= 0x2d9) - cp = cns11643l1_from_ucs4_tab2[ch - 0x2c7]; - else if (ch < 0x391) - cp = ""; - else if (ch <= 0x3c9) - cp = cns11643l1_from_ucs4_tab3[ch - 0x391]; - else if (ch < 0x2013) - cp = ""; - else if (ch <= 0x203e) - cp = cns11643l1_from_ucs4_tab4[ch - 0x2013]; - else if (ch == 0x2103) - cp = "\x22\x6a"; - else if (ch == 0x2105) - cp = "\x22\x22"; - else if (ch == 0x2109) - cp = "\x22\x6b"; - else if (ch < 0x2160) - cp = ""; - else if (ch <= 0x2169) + switch (ch) { + case 0xa7 ... 0xf7: + cp = __cns11643l1_from_ucs4_tab1[ch - 0xa7]; + break; + case 0x2c7 ... 0x2d9: + cp = __cns11643l1_from_ucs4_tab2[ch - 0x2c7]; + break; + case 0x391 ... 0x3c9: + cp = __cns11643l1_from_ucs4_tab3[ch - 0x391]; + break; + case 0x2013 ... 0x203e: + cp = __cns11643l1_from_ucs4_tab4[ch - 0x2013]; + break; + case 0x2103: + cp = "\x22\x6a"; + break; + case 0x2105: + cp = "\x22\x22"; + break; + case 0x2109: + cp = "\x22\x6b"; + break; + case 0x2160 ...0x2169: buf[0] = '\x24'; buf[1] = '\x2b' + (ch - 0x2160); - cp = buf; - } - else if (ch < 0x2170) - cp = ""; - else if (ch <= 0x2179) - { + break; + case 0x2170 ... 0x2179: buf[0] = '\x26'; buf[1] = '\x35' + (ch - 0x2170); - cp = buf; - } - else if (ch < 0x2190) - cp = ""; - else if (ch <= 0x2199) - cp = cns11643l1_from_ucs4_tab5[ch - 0x2190]; - else if (ch < 0x2215) - cp = ""; - else if (ch <= 0x2267) - cp = cns11643l1_from_ucs4_tab6[ch - 0x2215]; - else if (ch == 0x22a5) - cp = "\x22\x47"; - else if (ch == 0x22bf) - cp = "\x22\x4a"; - else if (ch < 0x2400) - cp = ""; - else if (ch <= 0x2421) - cp = cns11643l1_from_ucs4_tab7[ch - 0x2400]; - else if (ch < 0x2460) - cp = ""; - else if (ch <= 0x247d) - cp = cns11643l1_from_ucs4_tab8[ch - 0x2460]; - else if (ch < 0x2500) - cp = ""; - else if (ch <= 0x2642) - cp = cns11643l1_from_ucs4_tab9[ch - 0x2500]; - else if (ch < 0x3000) - cp = ""; - else if (ch <= 0x3029) - cp = cns11643l1_from_ucs4_tab10[ch - 0x3000]; - else if (ch == 0x30fb) - cp = "\x21\x26"; - else if (ch < 0x3105) - cp = ""; - else if (ch <= 0x3129) - { + break; + case 0x2190 ... 0x2199: + cp = __cns11643l1_from_ucs4_tab5[ch - 0x2190]; + break; + case 0x2215 ... 0x2267: + cp = __cns11643l1_from_ucs4_tab6[ch - 0x2215]; + break; + case 0x22a5: + cp = "\x22\x47"; + break; + case 0x22bf: + cp = "\x22\x4a"; + break; + case 0x2400 ... 0x2421: + cp = __cns11643l1_from_ucs4_tab7[ch - 0x2400]; + break; + case 0x2460 ... 0x247d: + cp = __cns11643l1_from_ucs4_tab8[ch - 0x2460]; + break; + case 0x2500 ... 0x2642: + cp = __cns11643l1_from_ucs4_tab9[ch - 0x2500]; + case 0x3000 ... 0x3029: + cp = __cns11643l1_from_ucs4_tab10[ch - 0x3000]; + break; + case 0x30fb: + cp = "\x21\x26"; + break; + case 0x3105 ... 0x3129: buf[0] = '\x25'; buf[1] = '\x26' + (ch - 0x3105); - cp = buf; - } - else if (ch == 0x32a3) - cp = "\x22\x21"; - else if (ch < 0x338e) - cp = ""; - else if (ch <= 0x33d5) - cp = cns11643l1_from_ucs4_tab11[ch - 0x338e]; - else if (ch < 0x4e00) - cp = ""; - else if (ch <= 0x9f9c) - { - cp = cns11643l1_from_ucs4_tab12[ch - 0x4e00]; + break; + case 0x32a3: + cp = "\x22\x21"; + break; + case 0x338e ... 0x33d5: + cp = __cns11643l1_from_ucs4_tab11[ch - 0x338e]; + break; + case 0x4e00 ... 0x9f9c: + cp = __cns11643l1_from_ucs4_tab12[ch - 0x4e00]; if (cp[0] == '\0') { /* Let's try the other planes. */ needed = 3; - cp = cns11643_from_ucs4_tab[ch - 0x4e00]; + cp = __cns11643_from_ucs4_tab[ch - 0x4e00]; } + break; + case 0xfe30 ... 0xfe6b: + cp = __cns11643l1_from_ucs4_tab13[ch - 0xfe30]; + break; + case 0xff01 ... 0xff5d: + cp = __cns11643l1_from_ucs4_tab14[ch - 0xff01]; + break; + case 0xffe0: + cp = "\x22\x66"; + break; + case 0xffe1: + cp = "\x22\x67"; + break; + case 0xffe5: + cp = "\x22\x64"; + break; + default: + cp = ""; } - else if (ch < 0xfe30) - cp = ""; - else if (ch <= 0xfe6b) - cp = cns11643l1_from_ucs4_tab13[ch - 0xfe30]; - else if (ch < 0xff01) - cp = ""; - else if (ch <= 0xff5d) - cp = cns11643l1_from_ucs4_tab14[ch - 0xff01]; - else if (ch == 0xffe0) - cp = "\x22\x66"; - else if (ch == 0xffe1) - cp = "\x22\x67"; - else if (ch == 0xffe5) - cp = "\x22\x64"; - else - cp = ""; if (cp[0] == '\0') return UNKNOWN_10646_CHAR; diff --git a/iconvdata/cns11643l1.c b/iconvdata/cns11643l1.c index 730fb55..d106b3d 100644 --- a/iconvdata/cns11643l1.c +++ b/iconvdata/cns11643l1.c @@ -45,7 +45,7 @@ printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const uint16_t cns11643l1_to_ucs4_tab[] = +const uint16_t __cns11643l1_to_ucs4_tab[] = { [0x0000] = 0x3000, [0x0001] = 0xff0c, [0x0002] = 0x3001, [0x0003] = 0x3002, [0x0004] = 0xff0e, [0x0005] = 0x30fb, [0x0006] = 0xff1b, [0x0007] = 0xff1a, @@ -1517,7 +1517,7 @@ const uint16_t cns11643l1_to_ucs4_tab[] = /* Some Latin1 characters, starting at U+00a7. */ -const char cns11643l1_from_ucs4_tab1[][2] = +const char __cns11643l1_from_ucs4_tab1[][2] = { [0x00] = "\x21\x70", [0x09] = "\x22\x78", [0x0a] = "\x22\x34", [0x10] = "\x21\x31", [0x30] = "\x22\x32", [0x50] = "\x22\x33" @@ -1525,7 +1525,7 @@ const char cns11643l1_from_ucs4_tab1[][2] = /* Some phonetic modifiers, starting at U+02c7. */ -const char cns11643l1_from_ucs4_tab2[][2] = +const char __cns11643l1_from_ucs4_tab2[][2] = { [0x00] = "\x25\x6f", [0x02] = "\x25\x6d", [0x03] = "\x25\x6e", [0x04] = "\x25\x70", [0x12] = "\x25\x6c" @@ -1552,7 +1552,7 @@ const char cns11643l1_from_ucs4_tab2[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab3[][2] = +const char __cns11643l1_from_ucs4_tab3[][2] = { [0x0000] = "\x24\x75", [0x0001] = "\x24\x76", [0x0002] = "\x24\x77", [0x0003] = "\x24\x78", [0x0004] = "\x24\x79", [0x0005] = "\x24\x7a", @@ -1593,7 +1593,7 @@ const char cns11643l1_from_ucs4_tab3[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab4[][2] = +const char __cns11643l1_from_ucs4_tab4[][2] = { [0x0000] = "\x21\x39", [0x0001] = "\x21\x37", [0x0003] = "\x22\x5d", [0x0005] = "\x21\x64", [0x0006] = "\x21\x65", [0x0009] = "\x21\x66", @@ -1603,7 +1603,7 @@ const char cns11643l1_from_ucs4_tab4[][2] = }; -const char cns11643l1_from_ucs4_tab5[][2] = +const char __cns11643l1_from_ucs4_tab5[][2] = { [0x00] = "\x22\x58", [0x01] = "\x22\x55", [0x02] = "\x22\x57", [0x03] = "\x22\x56", [0x06] = "\x22\x59", [0x07] = "\x22\x5a", @@ -1631,7 +1631,7 @@ const char cns11643l1_from_ucs4_tab5[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab6[][2] = +const char __cns11643l1_from_ucs4_tab6[][2] = { [0x0000] = "\x22\x61", [0x0005] = "\x22\x35", [0x0009] = "\x22\x3c", [0x000a] = "\x22\x49", [0x000b] = "\x22\x48", [0x0014] = "\x22\x45", @@ -1661,7 +1661,7 @@ const char cns11643l1_from_ucs4_tab6[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab7[][2] = +const char __cns11643l1_from_ucs4_tab7[][2] = { [0x0000] = "\x42\x21", [0x0001] = "\x42\x22", [0x0002] = "\x42\x23", [0x0003] = "\x42\x24", [0x0004] = "\x42\x25", [0x0005] = "\x42\x26", @@ -1697,7 +1697,7 @@ const char cns11643l1_from_ucs4_tab7[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab8[][2] = +const char __cns11643l1_from_ucs4_tab8[][2] = { [0x0000] = "\x26\x21", [0x0001] = "\x26\x22", [0x0002] = "\x26\x23", [0x0003] = "\x26\x24", [0x0004] = "\x26\x25", [0x0005] = "\x26\x26", @@ -1729,7 +1729,7 @@ const char cns11643l1_from_ucs4_tab8[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab9[][2] = +const char __cns11643l1_from_ucs4_tab9[][2] = { [0x0000] = "\x23\x39", [0x0002] = "\x23\x3a", [0x000c] = "\x23\x3c", [0x0010] = "\x23\x3d", [0x0014] = "\x23\x3e", [0x0018] = "\x23\x3f", @@ -1774,7 +1774,7 @@ const char cns11643l1_from_ucs4_tab9[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab10[][2] = +const char __cns11643l1_from_ucs4_tab10[][2] = { [0x0000] = "\x21\x21", [0x0001] = "\x21\x23", [0x0002] = "\x21\x24", [0x0003] = "\x21\x71", [0x0008] = "\x21\x52", [0x0009] = "\x21\x53", @@ -1809,7 +1809,7 @@ const char cns11643l1_from_ucs4_tab10[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab11[][2] = +const char __cns11643l1_from_ucs4_tab11[][2] = { [0x0000] = "\x22\x75", [0x0001] = "\x22\x76", [0x000e] = "\x22\x70", [0x000f] = "\x22\x71", [0x0010] = "\x22\x72", [0x0013] = "\x22\x74", @@ -1838,7 +1838,7 @@ const char cns11643l1_from_ucs4_tab11[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab12[][2] = +const char __cns11643l1_from_ucs4_tab12[][2] = { [0x0000] = "\x44\x21", [0x0001] = "\x44\x23", [0x0003] = "\x44\x24", [0x0008] = "\x44\x37", [0x0009] = "\x44\x35", [0x000a] = "\x44\x38", @@ -3667,7 +3667,7 @@ const char cns11643l1_from_ucs4_tab12[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab13[][2] = +const char __cns11643l1_from_ucs4_tab13[][2] = { [0x0000] = "\x21\x2b", [0x0001] = "\x21\x36", [0x0002] = "\x21\x38", [0x0005] = "\x21\x40", [0x0006] = "\x21\x41", [0x0007] = "\x21\x44", @@ -3709,7 +3709,7 @@ const char cns11643l1_from_ucs4_tab13[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char cns11643l1_from_ucs4_tab14[][2] = +const char __cns11643l1_from_ucs4_tab14[][2] = { [0x0000] = "\x21\x2a", [0x0002] = "\x21\x6c", [0x0003] = "\x22\x63", [0x0004] = "\x22\x68", [0x0005] = "\x21\x6d", [0x0007] = "\x21\x3e", diff --git a/iconvdata/cns11643l1.h b/iconvdata/cns11643l1.h index 4f9d085..aa78c26 100644 --- a/iconvdata/cns11643l1.h +++ b/iconvdata/cns11643l1.h @@ -19,9 +19,10 @@ Boston, MA 02111-1307, USA. */ #include <stdint.h> +#include <gconv.h> /* Table for CNS 11643, plane 1 to UCS4 conversion. */ -extern const uint16_t cns11643l1_to_ucs4_tab[]; +extern const uint16_t __cns11643l1_to_ucs4_tab[]; static inline wchar_t @@ -47,25 +48,25 @@ cns11643l1_to_ucs4 (const char **s, size_t avail, unsigned char offset) (*s) += 2; - return cns11643l1_to_ucs4_tab[idx] ?: ((*s) -= 2, UNKNOWN_10646_CHAR); + return __cns11643l1_to_ucs4_tab[idx] ?: ((*s) -= 2, UNKNOWN_10646_CHAR); } /* Tables for the UCS4 -> CNS conversion. */ -extern const char cns11643l1_from_ucs4_tab1[][2]; -extern const char cns11643l1_from_ucs4_tab2[][2]; -extern const char cns11643l1_from_ucs4_tab3[][2]; -extern const char cns11643l1_from_ucs4_tab4[][2]; -extern const char cns11643l1_from_ucs4_tab5[][2]; -extern const char cns11643l1_from_ucs4_tab6[][2]; -extern const char cns11643l1_from_ucs4_tab7[][2]; -extern const char cns11643l1_from_ucs4_tab8[][2]; -extern const char cns11643l1_from_ucs4_tab9[][2]; -extern const char cns11643l1_from_ucs4_tab10[][2]; -extern const char cns11643l1_from_ucs4_tab11[][2]; -extern const char cns11643l1_from_ucs4_tab12[][2]; -extern const char cns11643l1_from_ucs4_tab13[][2]; -extern const char cns11643l1_from_ucs4_tab14[][2]; +extern const char __cns11643l1_from_ucs4_tab1[][2]; +extern const char __cns11643l1_from_ucs4_tab2[][2]; +extern const char __cns11643l1_from_ucs4_tab3[][2]; +extern const char __cns11643l1_from_ucs4_tab4[][2]; +extern const char __cns11643l1_from_ucs4_tab5[][2]; +extern const char __cns11643l1_from_ucs4_tab6[][2]; +extern const char __cns11643l1_from_ucs4_tab7[][2]; +extern const char __cns11643l1_from_ucs4_tab8[][2]; +extern const char __cns11643l1_from_ucs4_tab9[][2]; +extern const char __cns11643l1_from_ucs4_tab10[][2]; +extern const char __cns11643l1_from_ucs4_tab11[][2]; +extern const char __cns11643l1_from_ucs4_tab12[][2]; +extern const char __cns11643l1_from_ucs4_tab13[][2]; +extern const char __cns11643l1_from_ucs4_tab14[][2]; static inline size_t @@ -73,110 +74,95 @@ ucs4_to_cns11643l1 (wchar_t wch, char *s, size_t avail) { unsigned int ch = (unsigned int) wch; char buf[2]; - const char *cp = NULL; - - if (ch < 0xa7) - cp = ""; - else if (ch < 0xf7) - cp = cns11643l1_from_ucs4_tab1[ch - 0xa7]; - else if (ch < 0x2c7) - cp = ""; - else if (ch <= 0x2d9) - cp = cns11643l1_from_ucs4_tab2[ch - 0x2c7]; - else if (ch < 0x391) - cp = ""; - else if (ch <= 0x3c9) - cp = cns11643l1_from_ucs4_tab3[ch - 0x391]; - else if (ch < 0x2013) - cp = ""; - else if (ch <= 0x203e) - cp = cns11643l1_from_ucs4_tab4[ch - 0x2013]; - else if (ch == 0x2103) - cp = "\x22\x6a"; - else if (ch == 0x2105) - cp = "\x22\x22"; - else if (ch == 0x2109) - cp = "\x22\x6b"; - else if (ch < 0x2160) - cp = ""; - else if (ch <= 0x2169) + const char *cp = buf; + + switch (ch) { + case 0xa7 ... 0xf7: + cp = __cns11643l1_from_ucs4_tab1[ch - 0xa7]; + break; + case 0x2c7 ... 0x2d9: + cp = __cns11643l1_from_ucs4_tab2[ch - 0x2c7]; + break; + case 0x391 ... 0x3c9: + cp = __cns11643l1_from_ucs4_tab3[ch - 0x391]; + case 0x2013 ... 0x203e: + cp = __cns11643l1_from_ucs4_tab4[ch - 0x2013]; + case 0x2103: + cp = "\x22\x6a"; + break; + case 0x2105: + cp = "\x22\x22"; + break; + case 0x2109: + cp = "\x22\x6b"; + break; + case 0x2160 ... 0x2169: buf[0] = '\x24'; buf[1] = '\x2b' + (ch - 0x2160); - cp = buf; - } - else if (ch < 0x2170) - cp = ""; - else if (ch <= 0x2179) - { + break; + case 0x2170 ... 0x2179: buf[0] = '\x26'; buf[1] = '\x35' + (ch - 0x2170); - cp = buf; - } - else if (ch < 0x2190) - cp = ""; - else if (ch <= 0x2199) - cp = cns11643l1_from_ucs4_tab5[ch - 0x2190]; - else if (ch < 0x2215) - cp = ""; - else if (ch <= 0x2267) - cp = cns11643l1_from_ucs4_tab6[ch - 0x2215]; - else if (ch == 0x22a5) - cp = "\x22\x47"; - else if (ch == 0x22bf) - cp = "\x22\x4a"; - else if (ch < 0x2400) - cp = ""; - else if (ch <= 0x2421) - cp = cns11643l1_from_ucs4_tab7[ch - 0x2400]; - else if (ch < 0x2460) - cp = ""; - else if (ch <= 0x247d) - cp = cns11643l1_from_ucs4_tab8[ch - 0x2460]; - else if (ch < 0x2500) - cp = ""; - else if (ch <= 0x2642) - cp = cns11643l1_from_ucs4_tab9[ch - 0x2500]; - else if (ch < 0x3000) - cp = ""; - else if (ch <= 0x3029) - cp = cns11643l1_from_ucs4_tab10[ch - 0x3000]; - else if (ch == 0x30fb) - cp = "\x21\x26"; - else if (ch < 0x3105) - cp = ""; - else if (ch <= 0x3129) - { + break; + case 0x2190 ...0x2199: + cp = __cns11643l1_from_ucs4_tab5[ch - 0x2190]; + break; + case 0x2215 ... 0x2267: + cp = __cns11643l1_from_ucs4_tab6[ch - 0x2215]; + break; + case 0x22a5: + cp = "\x22\x47"; + break; + case 0x22bf: + cp = "\x22\x4a"; + break; + case 0x2400 ... 0x2421: + cp = __cns11643l1_from_ucs4_tab7[ch - 0x2400]; + break; + case 0x2460 ... 0x247d: + cp = __cns11643l1_from_ucs4_tab8[ch - 0x2460]; + break; + case 0x2500 ... 0x2642: + cp = __cns11643l1_from_ucs4_tab9[ch - 0x2500]; + break; + case 0x3000 ... 0x3029: + cp = __cns11643l1_from_ucs4_tab10[ch - 0x3000]; + break; + case 0x30fb: + cp = "\x21\x26"; + break; + case 0x3105 ... 0x3129: buf[0] = '\x25'; buf[1] = '\x26' + (ch - 0x3105); - cp = buf; + break; + case 0x32a3: + cp = "\x22\x21"; + break; + case 0x338e ... 0x33d5: + cp = __cns11643l1_from_ucs4_tab11[ch - 0x338e]; + break; + case 0x4e00 ... 0x9f9c: + cp = __cns11643l1_from_ucs4_tab12[ch - 0x4e00]; + break; + case 0xfe30 ... 0xfe6b: + cp = __cns11643l1_from_ucs4_tab13[ch - 0xfe30]; + break; + case 0xff01 ... 0xff5d: + cp = __cns11643l1_from_ucs4_tab14[ch - 0xff01]; + break; + case 0xffe0: + cp = "\x22\x66"; + break; + case 0xffe1: + cp = "\x22\x67"; + break; + case 0xffe5: + cp = "\x22\x64"; + break; + default: + buf[0] = '\0'; } - else if (ch == 0x32a3) - cp = "\x22\x21"; - else if (ch < 0x338e) - cp = ""; - else if (ch <= 0x33d5) - cp = cns11643l1_from_ucs4_tab11[ch - 0x338e]; - else if (ch < 0x4e00) - cp = ""; - else if (ch <= 0x9f9c) - cp = cns11643l1_from_ucs4_tab12[ch - 0x4e00]; - else if (ch < 0xfe30) - cp = ""; - else if (ch <= 0xfe6b) - cp = cns11643l1_from_ucs4_tab13[ch - 0xfe30]; - else if (ch < 0xff01) - cp = ""; - else if (ch <= 0xff5d) - cp = cns11643l1_from_ucs4_tab14[ch - 0xff01]; - else if (ch == 0xffe0) - cp = "\x22\x66"; - else if (ch == 0xffe1) - cp = "\x22\x67"; - else if (ch == 0xffe5) - cp = "\x22\x64"; - else - cp = ""; if (cp[0] == '\0') return UNKNOWN_10646_CHAR; diff --git a/iconvdata/ebcdic-at-de-a.c b/iconvdata/ebcdic-at-de-a.c index 7251490..654bdf8 100644 --- a/iconvdata/ebcdic-at-de-a.c +++ b/iconvdata/ebcdic-at-de-a.c @@ -1,5 +1,5 @@ /* Conversion from and to EBCDIC-AT-DE-A. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,12 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> + +/* Get the conversion table. */ #include <ebcdic-at-de-a.h> -#define NAME "EBCDIC-AT-DE-A" + +#define CHARSET_NAME "EBCDIC-AT-DE-A" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/ebcdic-at-de.c b/iconvdata/ebcdic-at-de.c index d9168fc..ab71885 100644 --- a/iconvdata/ebcdic-at-de.c +++ b/iconvdata/ebcdic-at-de.c @@ -1,5 +1,5 @@ /* Conversion from and to EBCDIC-AT-DE. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,12 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> + +/* Get the conversion table. */ #include <ebcdic-at-de.h> -#define NAME "EBCDIC-AT-DE" + +#define CHARSET_NAME "EBCDIC-AT-DE" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/ebcdic-ca-fr.c b/iconvdata/ebcdic-ca-fr.c index a42914e..91cf5aa 100644 --- a/iconvdata/ebcdic-ca-fr.c +++ b/iconvdata/ebcdic-ca-fr.c @@ -1,5 +1,5 @@ /* Conversion from and to EBCDIC-CA-FR. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,12 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> + +/* Get the conversion table. */ #include <ebcdic-ca-fr.h> -#define NAME "EBCDIC-CA-FR" + +#define CHARSET_NAME "EBCDIC-CA-FR" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/euccn.c b/iconvdata/euccn.c index f683836f..90e82cb 100644 --- a/iconvdata/euccn.c +++ b/iconvdata/euccn.c @@ -18,262 +18,124 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> -#include <stdint.h> -#include <string.h> -#include <wchar.h> #include <gb2312.h> +#include <stdint.h> -/* Direction of the transformation. */ -static int to_euccn_object; -static int from_euccn_object; - - -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "EUC-CN") != NULL) - step->data = &from_euccn_object; - else if (strcasestr (step->to_name, "EUC-CN") != NULL) - step->data = &to_euccn_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_euccn_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = (unsigned char) inbuf[cnt]; - wchar_t ch; - - if (inchar <= 0x7f) - ch = (wchar_t) inchar; - else if ((inchar <= 0xa0 || inchar > 0xfe) - && inchar != 0x8e && inchar != 0x8f) - /* This is illegal. */ - ch = L'\0'; - else - { - /* Two or more byte character. First test whether the - next character is also available. */ - const char *endp; - int inchar2; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = (unsigned char) inbuf[++cnt]; - - /* All second bytes of a multibyte character must be - >= 0xa1. */ - if (inchar2 < 0xa1) - { - /* This is an illegal character. */ - --cnt; - result = GCONV_ILLEGAL_INPUT; - break; - } - - /* This is code set 1: GB 2312-80. */ - endp = &inbuf[cnt - 1]; - - ch = gb2312_to_ucs4 (&endp, 2, 0x80); - if (ch != L'\0') - ++cnt; - - if (ch == UNKNOWN_10646_CHAR) - ch = L'\0'; - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - wchar_t ch = *((wchar_t *) (inbuf + cnt)); - - if (ch <= L'\x7f') - /* It's plain ASCII. */ - outbuf[outchars] = ch; - else - { - /* Try the JIS character sets. */ - size_t found; - - found = ucs4_to_gb2312 (ch, &outbuf[outchars], - (data->outbufsize - - outchars)); - if (found > 0) - { - /* It's a GB 2312 character, adjust it for - EUC-CN. */ - outbuf[outchars++] += 0x80; - outbuf[outchars] += 0x80; - } - else if (found == 0) - { - /* We ran out of space. */ - extra = 2; - break; - } - else - /* Illegal character. */ - break; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_euccn_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "EUC-CN" +#define FROM_LOOP from_euc_cn +#define TO_LOOP to_euc_cn +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from ISO 8859-1 to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + if (ch <= 0x7f) \ + ++inptr; \ + else \ + if ((ch <= 0xa0 || ch > 0xfe) && ch != 0x8e && ch != 0x8f) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* Two or more byte character. First test whether the \ + next character is also available. */ \ + const char *endp; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. Store \ + the intermediate result. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch = inptr[1]; \ + \ + /* All second bytes of a multibyte character must be >= 0xa1. */ \ + if (ch < 0xa1) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* This is code set 1: GB 2312-80. */ \ + endp = inptr; \ + \ + ch = gb2312_to_ucs4 (&endp, 2, 0x80); \ + if (ch == UNKNOWN_10646_CHAR) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 2; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + \ + if (ch <= L'\x7f') \ + /* It's plain ASCII. */ \ + *outptr++ = (unsigned char) ch; \ + else \ + { \ + size_t found; \ + \ + found = ucs4_to_gb2312 (ch, outptr, \ + (NEED_LENGTH_TEST \ + ? outend - outptr : MAX_NEEDED_OUTPUT)); \ + if (!NEED_LENGTH_TEST || found != 0) \ + { \ + if (found == UNKNOWN_10646_CHAR) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* It's a GB 2312 character, adjust it for EUC-CN. */ \ + *outptr++ += 0x80; \ + *outptr++ += 0x80; \ + } \ + else \ + { \ + /* We ran out of space. */ \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + } \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/eucjp.c b/iconvdata/eucjp.c index e6a71cc..24ebed9 100644 --- a/iconvdata/eucjp.c +++ b/iconvdata/eucjp.c @@ -18,306 +18,190 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> #include <stdint.h> -#include <string.h> -#include <wchar.h> +#include <gconv.h> #include <jis0201.h> #include <jis0208.h> #include <jis0212.h> -/* Direction of the transformation. */ -static int to_eucjp_object; -static int from_eucjp_object; - - -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "EUC-JP") != NULL) - step->data = &from_eucjp_object; - else if (strcasestr (step->to_name, "EUC-JP") != NULL) - step->data = &to_eucjp_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_eucjp_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = (unsigned char) inbuf[cnt]; - wchar_t ch; - - if (inchar <= 0x7f) - ch = (wchar_t) inchar; - else if ((inchar <= 0xa0 || inchar > 0xfe) - && inchar != 0x8e && inchar != 0x8f) - /* This is illegal. */ - ch = L'\0'; - else - { - /* Two or more byte character. First test whether the - next character is also available. */ - int inchar2; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = (unsigned char) inbuf[++cnt]; - - /* All second bytes of a multibyte character must be - >= 0xa1. */ - if (inchar2 < 0xa1) - { - /* This is an illegal character. */ - --cnt; - result = GCONV_ILLEGAL_INPUT; - break; - } - - if (inchar == '\x8e') - /* This is code set 2: half-width katakana. */ - ch = jisx0201_to_ucs4 (inchar2); - else if (inchar == '\x8f') - { - /* This is code set 3: JIS X 0212-1990. */ - const char *endp = &inbuf[cnt]; - - ch = jisx0212_to_ucs4 (&endp, 1 + inchars - cnt, - 0x80); - cnt = endp - inbuf; - } - else - { - /* This is code set 1: JIS X 0208. */ - const char *endp = &inbuf[cnt - 1]; - - ch = jisx0208_to_ucs4 (&endp, 2 + inchars - cnt, - 0x80); - if (ch != L'\0') - ++cnt; - } - - if (ch == UNKNOWN_10646_CHAR) - ch = L'\0'; - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - wchar_t ch = *((wchar_t *) (inbuf + cnt)); - - if (ch <= L'\x7f') - /* It's plain ASCII. */ - outbuf[outchars] = ch; - else - { - /* Try the JIS character sets. */ - size_t found; - - found = ucs4_to_jisx0201 (ch, &outbuf[outchars]); - - if (found == UNKNOWN_10646_CHAR) - { - /* No JIS 0201 character. */ - found = ucs4_to_jisx0208 (ch, &outbuf[outchars], - (data->outbufsize - - outchars)); - if (found == 0) - { - /* We ran out of space. */ - extra = 2; - break; - } - else if (found != UNKNOWN_10646_CHAR) - { - /* It's a JIS 0208 character, adjust it for - EUC-JP. */ - outbuf[outchars++] += 0x80; - outbuf[outchars] += 0x80; - } - else - { - /* No JIS 0208 character. */ - found = ucs4_to_jisx0212 (ch, &outbuf[outchars], - (data->outbufsize - - outchars)); - - if (found == 0) - { - /* We ran out of space. */ - extra = 2; - break; - } - else if (found != UNKNOWN_10646_CHAR) - { - /* It's a JIS 0212 character, adjust it for - EUC-JP. */ - outbuf[outchars++] += 0x80; - outbuf[outchars] += 0x80; - } - else - /* Illegal character. */ - break; - } - } - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_eucjp_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "EUC-JP" +#define FROM_LOOP from_euc_jp +#define TO_LOOP to_euc_jp +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 3 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from EUC-JP to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + if (ch <= 0x7f) \ + ++inptr; \ + else if ((ch <= 0xa0 || ch > 0xfe) && ch != 0x8e && ch != 0x8f) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* Two or more byte character. First test whether the next \ + character is also available. */ \ + int ch2; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. Store the \ + intermediate result. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + \ + /* All second bytes of a multibyte character must be >= 0xa1. */ \ + if (ch2 < 0xa1) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + if (ch == 0x8e) \ + { \ + /* This is code set 2: half-width katakana. */ \ + ch = jisx0201_to_ucs4 (ch2); \ + inptr += 2; \ + } \ + else \ + { \ + const unsigned char *endp; \ + \ + if (ch == 0x8f) \ + { \ + /* This is code set 3: JIS X 0212-1990. */ \ + endp = inptr + 1; \ + \ + ch = jisx0212_to_ucs4 (&endp, \ + NEED_LENGTH_TEST ? inend - endp : 2, \ + 0x80); \ + } \ + else \ + { \ + /* This is code set 1: JIS X 0208. */ \ + endp = inptr; \ + \ + ch = jisx0208_to_ucs4 (&endp, \ + NEED_LENGTH_TEST ? inend - inptr : 2, \ + 0x80); \ + } \ + \ + if (NEED_LENGTH_TEST && ch == 0) \ + { \ + /* Not enough input available. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + if (ch == UNKNOWN_10646_CHAR) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr = endp; \ + } \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + \ + if (ch <= 0x7f) \ + /* It's plain ASCII. */ \ + *outptr++ = ch; \ + else \ + { \ + /* Try the JIS character sets. */ \ + size_t found; \ + \ + /* See whether we have room for at least two characters. */ \ + if (NEED_LENGTH_TEST && outptr + 1 >= outend) \ + { \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + found = ucs4_to_jisx0201 (ch, outptr + 1); \ + if (found != UNKNOWN_10646_CHAR) \ + { \ + /* Yes, it's a JIS 0201 character. Store the shift byte. */ \ + *outptr = 0x8e; \ + outptr += 2; \ + } \ + else \ + { \ + /* No JIS 0201 character. */ \ + found = ucs4_to_jisx0208 (ch, outptr, 2); \ + /* Please note that we always have enough room for the output. */ \ + if (found != UNKNOWN_10646_CHAR) \ + { \ + /* It's a JIS 0208 character, adjust it for EUC-JP. */ \ + *outptr++ += 0x80; \ + *outptr++ += 0x80; \ + } \ + else \ + { \ + /* No JIS 0208 character. */ \ + found = ucs4_to_jisx0212 (ch, outptr + 1, \ + (NEED_LENGTH_TEST \ + ? outend - outptr - 1 : 2)); \ + \ + if (found == 0) \ + { \ + /* We ran out of space. */ \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + else if (found != UNKNOWN_10646_CHAR) \ + { \ + /* It's a JIS 0212 character, adjust it for EUC-JP. */ \ + *outptr++ = 0x8f; \ + *outptr++ += 0x80; \ + *outptr++ += 0x80; \ + } \ + else \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + } \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/euckr.c b/iconvdata/euckr.c index 2ad9478..50e4b40 100644 --- a/iconvdata/euckr.c +++ b/iconvdata/euckr.c @@ -1,7 +1,8 @@ /* Mapping tables for EUC-KR handling. Copyright (C) 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jungshik Shin <jshin@pantheon.yale.edu>, 1998. + Contributed by Jungshik Shin <jshin@pantheon.yale.edu> + and Ulrich Drepper <drepper@cygnus.com>, 1998. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -18,276 +19,142 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> #include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <wchar.h> #include <ksc5601.h> -/* Direction of the transformation. */ -static int to_euckr_object; -static int from_euckr_object; - static inline void -euckr_from_ucs4(wchar_t ch, unsigned char *cp) +euckr_from_ucs4 (uint32_t ch, unsigned char *cp) { if (ch > 0x7f) { - uint16_t idx=0; + uint16_t idx = 0; if (ucs4_to_ksc5601 (ch, &idx)) idx |= 0x8080; - *cp = (unsigned char) (idx/256); - *(cp+1) = (unsigned char) (idx & 0xff) ; + cp[0] = (unsigned char) (idx / 256); + cp[1] = (unsigned char) (idx & 0xff); } - /* think about 0x5c ; '\' */ + /* XXX Think about 0x5c ; '\'. */ else { - *cp = (unsigned char) (0x7f & ch) ; - *(cp+1) = (unsigned char) 0; + cp[0] = (unsigned char) ch; + cp[1] = '\0'; } } -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "EUC-KR") != NULL) - step->data = &from_euckr_object; - else if (strcasestr (step->to_name, "EUC-KR") != NULL) - step->data = &to_euckr_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_euckr_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = (unsigned char) inbuf[cnt]; - wchar_t ch; - - /* - half-width Korean Currency WON sign - - if (inchar == 0x5c) - ch = 0x20a9; - else if (inchar <= 0x7f) - ch = (wchar_t) inchar; - */ - - if (inchar <= 0x7f) - ch = (wchar_t) inchar; - - -/* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are user-defined areas */ - - else if ( inchar <= 0xa0 || inchar > 0xfe || inchar == 0xc9) - /* This is illegal. */ - ch = L'\0'; - else - { - /* Two-byte character. First test whether the next - character is also available. */ - int inchar2; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = (unsigned char) inbuf[++cnt]; - - ch = ksc5601_to_ucs4 ((uint16_t) (inchar * 256 + inchar2) - & 0x7f7f); - if (ch == UNKNOWN_10646_CHAR) - ch = L'\0'; - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - wchar_t ch = *((wchar_t *) (inbuf + cnt)); - unsigned char cp[2]; - -/* decomposing Hangul syllables not available in KS C 5601 into Jamos - should be considered either here or in euckr_from_ucs4() */ - - euckr_from_ucs4(ch,cp) ; - - if (cp[0] == '\0' && ch != 0) - /* Illegal character. */ - break; - - outbuf[outchars] = cp[0]; - /* Now test for a possible second byte and write this - if possible. */ - if (cp[1] != '\0') - { - if (outchars + 1 >= data->outbufsize) - { - /* The result does not fit into the buffer. */ - extra = 1; - break; - } - outbuf[++outchars] = cp[1]; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_euckr_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "EUC-KR" +#define FROM_LOOP from_euc_kr +#define TO_LOOP to_euc_kr +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from EUC-KR to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + /* Half-width Korean Currency WON sign \ + \ + if (inchar == 0x5c) \ + ch = 0x20a9; \ + else if (inchar <= 0x7f) \ + ch = (wchar_t) inchar; \ + */ \ + \ + if (ch <= 0x7f) \ + /* Plain ASCII. */ \ + ++inptr; \ + /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \ + user-defined areas. */ \ + else if (ch <= 0xa0 || ch > 0xfe || ch == 0xc9) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* Two-byte character. First test whether the next character \ + is also available. */ \ + int ch2; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + \ + if (ch2 < 0xa1 || ch2 >= 0xfe \ + || ((ch = ksc5601_to_ucs4 ((uint16_t) (ch * 256 + ch2) & 0x7f7f)) \ + == UNKNOWN_10646_CHAR)) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 2; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + unsigned char cp[2]; \ + \ + /* Decomposing Hangul syllables not available in KS C 5601 into \ + Jamos should be considered either here or in euckr_from_ucs4() */ \ + euckr_from_ucs4 (ch, cp) ; \ + \ + if (cp[0] == '\0' && ch != 0) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *outptr++ = cp[0]; \ + /* Now test for a possible second byte and write this if possible. */ \ + if (cp[1] != '\0') \ + { \ + if (NEED_LENGTH_TEST && outptr >= outend) \ + { \ + /* The result does not fit into the buffer. */ \ + --outptr; \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + *outptr++ = cp[1]; \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/euctw.c b/iconvdata/euctw.c index fd422c1..406dd67 100644 --- a/iconvdata/euctw.c +++ b/iconvdata/euctw.c @@ -18,302 +18,171 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> #include <stdint.h> -#include <string.h> -#include <wchar.h> #include <cns11643l1.h> #include <cns11643.h> -/* Direction of the transformation. */ -static int to_euctw_object; -static int from_euctw_object; - - -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "EUC-TW") != NULL) - step->data = &from_euctw_object; - else if (strcasestr (step->to_name, "EUC-TW") != NULL) - step->data = &to_euctw_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_euctw_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = (unsigned char) inbuf[cnt]; - wchar_t ch; - - if (inchar <= 0x7f) - ch = (wchar_t) inchar; - else if ((inchar <= 0xa0 || inchar > 0xfe) - && inchar != 0x8e) - /* This is illegal. */ - ch = L'\0'; - else - { - /* Two or more byte character. First test whether the - next character is also available. */ - int inchar2; - - if (cnt + 1 + (inchar == 0x8e ? 2 : 0) >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = (unsigned char) inbuf[++cnt]; - - /* All second bytes of a multibyte character must be - >= 0xa1. */ - if (inchar2 < 0xa1 && inchar2 == 0xff) - { - /* This is an illegal character. */ - --cnt; - result = GCONV_ILLEGAL_INPUT; - break; - } - - if (inchar == '\x8e') - { - /* This is code set 2: CNS 11643, planes 1 to 16. */ - const char *endp = &inbuf[cnt]; - - ch = cns11643_to_ucs4 (&endp, 2 + inchars - cnt, - 0x80); - - if (ch == UNKNOWN_10646_CHAR) - ch = L'\0'; - if (ch != L'\0') - cnt += 2; - } - else - { - /* This is code set 1: CNS 11643, plane 1. */ - const char *endp = &inbuf[cnt - 1]; - - ch = cns11643l1_to_ucs4 (&endp, 2 + inchars - cnt, - 0x80); - - if (ch == UNKNOWN_10646_CHAR) - ch = L'\0'; - if (ch != L'\0') - ++cnt; - } - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - wchar_t ch = *((wchar_t *) (inbuf + cnt)); - - if (ch <= L'\x7f') - /* It's plain ASCII. */ - outbuf[outchars] = ch; - else - { - /* Try the JIS character sets. */ - size_t found; - - found = ucs4_to_cns11643l1 (ch, &outbuf[outchars], - (data->outbufsize - - outchars)); - if (found == 0) - { - /* We ran out of space. */ - extra = 2; - break; - } - else if (found != UNKNOWN_10646_CHAR) - { - /* It's a CNS 11643, plane 1 character, adjust it - for EUC-TW. */ - outbuf[outchars++] += 0x80; - outbuf[outchars] += 0x80; - } - else - { - /* No CNS 11643, plane 1 character. */ - outbuf[outchars] = '\x8e'; - - found = ucs4_to_cns11643 (ch, &outbuf[outchars + 1], - (data->outbufsize - - outchars - 1)); - if (found > 0) - { - /* It's a CNS 11643 character, adjust it for - EUC-TW. */ - outbuf[++outchars] += 0xa0; - outbuf[++outchars] += 0x80; - outbuf[outchars] += 0x80; - } - else if (found == 0) - { - /* We ran out of space. */ - extra = 4; - break; - } - else - /* Illegal character. */ - break; - } - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_euctw_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "EUC-TW" +#define FROM_LOOP from_euc_tw +#define TO_LOOP to_euc_tw +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from EUC-TW to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + if (ch <= 0x7f) \ + /* Plain ASCII. */ \ + ++inptr; \ + else if ((ch <= 0xa0 || ch > 0xfe) && ch != 0x8e) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* Two or more byte character. First test whether the next \ + character is also available. */ \ + uint32_t ch2; \ + \ + if (NEED_LENGTH_TEST && inptr + (ch == 0x8e ? 3 : 1) >= inend) \ + { \ + /* The second character is not available. Store the \ + intermediate result. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = *inptr; \ + \ + /* All second bytes of a multibyte character must be >= 0xa1. */ \ + if (ch2 < 0xa1 || ch2 == 0xff) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + if (ch == 0x8e) \ + { \ + /* This is code set 2: CNS 11643, planes 1 to 16. */ \ + const char *endp = inptr + 1; \ + \ + ch = cns11643_to_ucs4 (&endp, \ + NEED_LENGTH_TEST ? inend - inptr - 1 : 3, \ + 0x80); \ + /* Please note that we need not test for the missing input \ + characters here anymore. */ \ + if (ch == UNKNOWN_10646_CHAR) \ + { \ + /* Illegal input. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 4; \ + } \ + else \ + { \ + /* This is code set 1: CNS 11643, plane 1. */ \ + const char *endp = inptr; \ + \ + ch = cns11643l1_to_ucs4 (&endp, \ + NEED_LENGTH_TEST ? inend - inptr : 2, \ + 0x80); \ + /* Please note that we need not test for the missing input \ + characters here anymore. */ \ + if (ch == UNKNOWN_10646_CHAR) \ + { \ + /* Illegal input. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 2; \ + } \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + \ + if (ch <= 0x7f) \ + /* It's plain ASCII. */ \ + *outptr++ = ch; \ + else \ + { \ + /* Try the JIS character sets. */ \ + size_t found; \ + \ + found = ucs4_to_cns11643l1 (ch, outptr, \ + NEED_LENGTH_TEST ? outend - outptr : 2); \ + if (NEED_LENGTH_TEST && found == 0) \ + { \ + /* We ran out of space. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + if (found != UNKNOWN_10646_CHAR) \ + { \ + /* It's a CNS 11643, plane 1 character, adjust it for EUC-TW. */ \ + *outptr++ += 0x80; \ + *outptr++ += 0x80; \ + } \ + else \ + { \ + /* No CNS 11643, plane 1 character. */ \ + \ + found = ucs4_to_cns11643 (ch, outptr + 1, \ + (NEED_LENGTH_TEST \ + ? outend - outptr - 1 : 3)); \ + if (NEED_LENGTH_TEST && found == 0) \ + { \ + /* We ran out of space. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + if (found == UNKNOWN_10646_CHAR) \ + { \ + /* No legal input. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* It's a CNS 11643 character, adjust it for EUC-TW. */ \ + *outptr++ = '\x8e'; \ + *outptr++ += 0xa0; \ + *outptr++ += 0x80; \ + *outptr++ += 0x80; \ + } \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/gb2312.c b/iconvdata/gb2312.c index 89d7161..9cde1c8 100644 --- a/iconvdata/gb2312.c +++ b/iconvdata/gb2312.c @@ -40,7 +40,7 @@ printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const uint16_t gb2312_to_ucs[] = +const uint16_t __gb2312_to_ucs[] = { [0x0000] = 0x3000, [0x0001] = 0x3001, [0x0002] = 0x3002, [0x0003] = 0x30fb, [0x0004] = 0x02c9, [0x0005] = 0x02c7, [0x0006] = 0x00a8, [0x0007] = 0x3003, @@ -1907,7 +1907,7 @@ const uint16_t gb2312_to_ucs[] = }; -const char gb2312_from_ucs4_tab1[][2] = +const char __gb2312_from_ucs4_tab1[][2] = { [0x00] = "\x21\x68", [0x03] = "\x21\x6c", [0x04] = "\x21\x27", [0x0c] = "\x21\x63", [0x0d] = "\x21\x40", [0x33] = "\x21\x41", @@ -1939,7 +1939,7 @@ const char gb2312_from_ucs4_tab1[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char gb2312_from_ucs4_tab2[][2] = +const char __gb2312_from_ucs4_tab2[][2] = { [0x0000] = "\x26\x21", [0x0001] = "\x26\x22", [0x0002] = "\x26\x23", [0x0003] = "\x26\x24", [0x0004] = "\x26\x25", [0x0005] = "\x26\x26", @@ -1980,7 +1980,7 @@ const char gb2312_from_ucs4_tab2[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char gb2312_from_ucs4_tab3[][2] = +const char __gb2312_from_ucs4_tab3[][2] = { [0x0000] = "\x27\x27", [0x000f] = "\x27\x21", [0x0010] = "\x27\x22", [0x0011] = "\x27\x23", [0x0012] = "\x27\x24", [0x0013] = "\x27\x25", @@ -2027,7 +2027,7 @@ const char gb2312_from_ucs4_tab3[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char gb2312_from_ucs4_tab4[][2] = +const char __gb2312_from_ucs4_tab4[][2] = { [0x0000] = "\x21\x2a", [0x0003] = "\x21\x2e", [0x0004] = "\x21\x2f", [0x0007] = "\x21\x30", [0x0008] = "\x21\x31", [0x0011] = "\x21\x2d", @@ -2059,7 +2059,7 @@ const char gb2312_from_ucs4_tab4[][2] = But we have a problem here since U+2225 maps to either 0x212C or 0x214E. We simply choose the first solution here. */ -const char gb2312_from_ucs4_tab5[][2] = +const char __gb2312_from_ucs4_tab5[][2] = { [0x0000] = "\x21\x66", [0x0013] = "\x21\x6d", [0x005d] = "\x22\x71", [0x005e] = "\x22\x72", [0x005f] = "\x22\x73", [0x0060] = "\x22\x74", @@ -2100,7 +2100,7 @@ const char gb2312_from_ucs4_tab5[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char gb2312_from_ucs4_tab6[][2] = +const char __gb2312_from_ucs4_tab6[][2] = { [0x0000] = "\x22\x59", [0x0001] = "\x22\x5a", [0x0002] = "\x22\x5b", [0x0003] = "\x22\x5c", [0x0004] = "\x22\x5d", [0x0005] = "\x22\x5e", @@ -2142,7 +2142,7 @@ const char gb2312_from_ucs4_tab6[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char gb2312_from_ucs4_tab7[][2] = +const char __gb2312_from_ucs4_tab7[][2] = { [0x0000] = "\x21\x21", [0x0001] = "\x21\x22", [0x0002] = "\x21\x23", [0x0003] = "\x21\x28", [0x0005] = "\x21\x29", [0x0008] = "\x21\x34", @@ -2243,7 +2243,7 @@ const char gb2312_from_ucs4_tab7[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char gb2312_from_ucs4_tab8[][2] = +const char __gb2312_from_ucs4_tab8[][2] = { [0x0000] = "\x52\x3b", [0x0001] = "\x36\x21", [0x0003] = "\x46\x5f", [0x0007] = "\x4d\x72", [0x0008] = "\x55\x49", [0x0009] = "\x48\x7d", @@ -4523,7 +4523,7 @@ const char gb2312_from_ucs4_tab8[][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char gb2312_from_ucs4_tab9[][2] = +const char __gb2312_from_ucs4_tab9[][2] = { [0x0000] = "\x23\x21", [0x0001] = "\x23\x22", [0x0002] = "\x23\x23", [0x0003] = "\x21\x67", [0x0004] = "\x23\x25", [0x0005] = "\x23\x26", diff --git a/iconvdata/gb2312.h b/iconvdata/gb2312.h index 922fcc8..df87950 100644 --- a/iconvdata/gb2312.h +++ b/iconvdata/gb2312.h @@ -25,7 +25,7 @@ #include <stdint.h> /* Conversion table. */ -extern const uint16_t gb2312_to_ucs[]; +extern const uint16_t __gb2312_to_ucs[]; static inline wchar_t @@ -51,127 +51,166 @@ gb2312_to_ucs4 (const char **s, size_t avail, unsigned char offset) (*s) += 2; - return gb2312_to_ucs[idx] ?: ((*s) -= 2, UNKNOWN_10646_CHAR); + return __gb2312_to_ucs[idx] ?: ((*s) -= 2, UNKNOWN_10646_CHAR); } -extern const char gb2312_from_ucs4_tab1[][2]; -extern const char gb2312_from_ucs4_tab2[][2]; -extern const char gb2312_from_ucs4_tab3[][2]; -extern const char gb2312_from_ucs4_tab4[][2]; -extern const char gb2312_from_ucs4_tab5[][2]; -extern const char gb2312_from_ucs4_tab6[][2]; -extern const char gb2312_from_ucs4_tab7[][2]; -extern const char gb2312_from_ucs4_tab8[][2]; -extern const char gb2312_from_ucs4_tab9[][2]; +extern const char __gb2312_from_ucs4_tab1[][2]; +extern const char __gb2312_from_ucs4_tab2[][2]; +extern const char __gb2312_from_ucs4_tab3[][2]; +extern const char __gb2312_from_ucs4_tab4[][2]; +extern const char __gb2312_from_ucs4_tab5[][2]; +extern const char __gb2312_from_ucs4_tab6[][2]; +extern const char __gb2312_from_ucs4_tab7[][2]; +extern const char __gb2312_from_ucs4_tab8[][2]; +extern const char __gb2312_from_ucs4_tab9[][2]; static inline size_t ucs4_to_gb2312 (wchar_t wch, char *s, size_t avail) { unsigned int ch = (unsigned int) wch; char buf[2]; - const char *cp = NULL; + const char *cp = buf; - if (ch < 0xa4) - return UNKNOWN_10646_CHAR; - else if (ch < 0x101) - cp = gb2312_from_ucs4_tab1[ch - 0xa4]; - else if (ch == 0x113) - cp = "\x28\x25"; - else if (ch == 0x11b) - cp = "\x28\x27"; - else if (ch == 0x12b) - cp = "\x28\x29"; - else if (ch == 0x14d) - cp = "\x28\x2d"; - else if (ch == 0x16b) - cp = "\x28\x31"; - else if (ch == 0x1ce) - cp = "\x28\x23"; - else if (ch == 0x1d0) - cp = "\x28\x2b"; - else if (ch == 0x1d2) - cp = "\x28\x2f"; - else if (ch == 0x1d4) - cp = "\x28\x33"; - else if (ch == 0x1d6) - cp = "\x28\x35"; - else if (ch == 0x1d8) - cp = "\x28\x36"; - else if (ch == 0x1da) - cp = "\x28\x37"; - else if (ch == 0x1dc) - cp = "\x28\x38"; - else if (ch == 0x2c7) - cp = "\x21\x26"; - else if (ch == 0x2c9) - cp = "\x21\x25"; - else if (ch >= 0x391 && ch <= 0x3c9) - cp = gb2312_from_ucs4_tab2[ch - 0x391]; - else if (ch >= 0x401 && ch <= 0x451) - cp = gb2312_from_ucs4_tab3[ch - 0x401]; - else if (ch >= 0x2015 && ch <= 0x203b) - cp = gb2312_from_ucs4_tab4[ch - 0x2015]; - else if (ch >= 0x2103 && ch <= 0x22a5) - cp = gb2312_from_ucs4_tab5[ch - 0x2103]; - else if (ch == 0x2313) - cp = "\x21\x50"; - else if (ch >= 0x2460 && ch <= 0x249b) - cp = gb2312_from_ucs4_tab6[ch - 0x2460]; - else if (ch >= 0x2500 && ch <= 0x254b) + switch (ch) { + case 0xa4 ... 0x100: + cp = __gb2312_from_ucs4_tab1[ch - 0xa4]; + break; + case 0x113: + cp = "\x28\x25"; + break; + case 0x11b: + cp = "\x28\x27"; + break; + case 0x12b: + cp = "\x28\x29"; + break; + case 0x14d: + cp = "\x28\x2d"; + break; + case 0x16b: + cp = "\x28\x31"; + break; + case 0x1ce: + cp = "\x28\x23"; + break; + case 0x1d0: + cp = "\x28\x2b"; + break; + case 0x1d2: + cp = "\x28\x2f"; + break; + case 0x1d4: + cp = "\x28\x33"; + break; + case 0x1d6: + cp = "\x28\x35"; + break; + case 0x1d8: + cp = "\x28\x36"; + break; + case 0x1da: + cp = "\x28\x37"; + break; + case 0x1dc: + cp = "\x28\x38"; + break; + case 0x2c7: + cp = "\x21\x26"; + break; + case 0x2c9: + cp = "\x21\x25"; + break; + case 0x391 ... 0x3c9: + cp = __gb2312_from_ucs4_tab2[ch - 0x391]; + break; + case 0x401 ... 0x451: + cp = __gb2312_from_ucs4_tab3[ch - 0x401]; + break; + case 0x2015 ... 0x203b: + cp = __gb2312_from_ucs4_tab4[ch - 0x2015]; + break; + case 0x2103 ... 0x22a5: + cp = __gb2312_from_ucs4_tab5[ch - 0x2103]; + break; + case 0x2313: + cp = "\x21\x50"; + break; + case 0x2460 ... 0x249b: + cp = __gb2312_from_ucs4_tab6[ch - 0x2460]; + break; + case 0x2500 ... 0x254b: buf[0] = '\x29'; buf[1] = '\x24' + (ch & 256); - cp = buf; - } - else if (ch == 0x25a0) - cp = "\x21\x76"; - else if (ch == 0x25a1) - cp = "\x21\x75"; - else if (ch == 0x25b2) - cp = "\x21\x78"; - else if (ch == 0x25b3) - cp = "\x21\x77"; - else if (ch == 0x25c6) - cp = "\x21\x74"; - else if (ch == 0x25c7) - cp = "\x21\x73"; - else if (ch == 0x25cb) - cp = "\x21\x70"; - else if (ch == 0x25ce) - cp = "\x21\x72"; - else if (ch == 0x25cf) - cp = "\x21\x71"; - else if (ch == 0x2605) - cp = "\x21\x6f"; - else if (ch == 0x2606) - cp = "\x21\x6e"; - else if (ch == 0x2640) - cp = "\x21\x62"; - else if (ch == 0x2642) - cp = "\x21\x61"; - else if (ch >= 0x3000 && ch <= 0x3129) - cp = gb2312_from_ucs4_tab7[ch - 0x3000]; - else if (ch >= 0x3220 && ch <= 0x3229) - { + break; + case 0x25a0: + cp = "\x21\x76"; + break; + case 0x25a1: + cp = "\x21\x75"; + break; + case 0x25b2: + cp = "\x21\x78"; + break; + case 0x25b3: + cp = "\x21\x77"; + break; + case 0x25c6: + cp = "\x21\x74"; + break; + case 0x25c7: + cp = "\x21\x73"; + break; + case 0x25cb: + cp = "\x21\x70"; + break; + case 0x25ce: + cp = "\x21\x72"; + break; + case 0x25cf: + cp = "\x21\x71"; + break; + case 0x2605: + cp = "\x21\x6f"; + break; + case 0x2606: + cp = "\x21\x6e"; + break; + case 0x2640: + cp = "\x21\x62"; + break; + case 0x2642: + cp = "\x21\x61"; + break; + case 0x3000 ... 0x3129: + cp = __gb2312_from_ucs4_tab7[ch - 0x3000]; + break; + case 0x3220 ... 0x3229: buf[0] = '\x22'; buf[1] = '\x65' + (ch - 0x3220); - cp = buf; + break; + case 0x4e00 ... 0x9fa0: + cp = __gb2312_from_ucs4_tab8[ch - 0x4e00]; + break; + case 0xff01 ... 0xff5e: + cp = __gb2312_from_ucs4_tab9[ch - 0xff01]; + break; + case 0xffe0: + cp = "\x21\x69"; + break; + case 0xffe1: + cp = "\x21\x6a"; + break; + case 0xffe3: + cp = "\x23\x7e"; + break; + case 0xffe5: + cp = "\x23\x24"; + break; + default: + return UNKNOWN_10646_CHAR; } - else if (ch >= 0x4e00 && ch <= 0x9fa0) - cp = gb2312_from_ucs4_tab8[ch - 0x4e00]; - else if (ch >= 0xff01 && ch <= 0xff5e) - cp = gb2312_from_ucs4_tab9[ch - 0xff01]; - else if (ch == 0xffe0) - cp = "\x21\x69"; - else if (ch == 0xffe1) - cp = "\x21\x6a"; - else if (ch == 0xffe3) - cp = "\x23\x7e"; - else if (ch == 0xffe5) - cp = "\x23\x24"; - else - return UNKNOWN_10646_CHAR; if (cp[1] != '\0' && avail < 2) return 0; diff --git a/iconvdata/hp-roman8.c b/iconvdata/hp-roman8.c index db36cea..62e29ea 100644 --- a/iconvdata/hp-roman8.c +++ b/iconvdata/hp-roman8.c @@ -1,5 +1,5 @@ /* Conversion from and to HP-ROMAN8. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,12 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> + +/* Get the conversion table. */ #include <hp-roman8.h> -#define NAME "HP-ROMAN8" + +#define CHARSET_NAME "HP-ROMAN8" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c index 53ca76c..3c40c8f 100644 --- a/iconvdata/iso646.c +++ b/iconvdata/iso646.c @@ -292,19 +292,17 @@ gconv (struct gconv_step *step, struct gconv_step_data *data, /* Correct the output buffer. */ if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } + memmove (data->outbuf, + &data->outbuf[data->outbufavail - newavail], + newavail); + data->outbufavail = newavail; } } while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); } if (written != NULL && data->is_last) - *written = do_write; + *written += do_write; return result; } diff --git a/iconvdata/iso6937.c b/iconvdata/iso6937.c index 21e3ab4..b121ffa 100644 --- a/iconvdata/iso6937.c +++ b/iconvdata/iso6937.c @@ -18,11 +18,10 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> -#include <string.h> +#include <stdint.h> /* Data taken from the WG15 tables. */ -static const wchar_t to_ucs4[256] = +static const uint32_t to_ucs4[256] = { /* 0x00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, /* 0x08 */ 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, @@ -60,7 +59,7 @@ static const wchar_t to_ucs4[256] = /* The outer array range runs from 0xc1 to 0xcf, the inner range from 0x20 to 0x7f. */ -static const wchar_t to_ucs4_comb[15][96] = +static const uint32_t to_ucs4_comb[15][96] = { /* 0xc1 */ { @@ -371,290 +370,179 @@ static const char from_ucs4[][2] = */ }; -/* Direction of the transformation. */ -static int to_iso6937_object; -static int from_iso6937_object; +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "ISO_6937" +#define FROM_LOOP from_iso6937 +#define TO_LOOP to_iso6937 +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from ISO 6937 to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + if (ch >= 0xc1 && ch <= 0xcf) \ + { \ + /* Composed character. First test whether the next character \ + is also available. */ \ + int ch2; \ + \ + if (inptr + 1 >= inend) \ + { \ + /* The second character is not available. Store the \ + intermediate result. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + \ + if (ch2 < 0x20 || ch2 >= 0x80) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ch = to_ucs4_comb[ch - 0xc1][ch2 - 0x20]; \ + \ + if (ch == 0) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 2; \ + } \ + else \ + { \ + ch = to_ucs4[ch]; \ + \ + if (ch == 0 && *inptr != '\0') \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + ++inptr; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + char tmp[2]; \ + uint32_t ch = *((uint32_t *) inptr); \ + const char *cp; \ + \ + if (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0])) \ + { \ + int fail = 0; \ + switch (ch) \ + { \ + case 0x2c7: \ + cp = "\xcf\x20"; \ + break; \ + case 0x2d8 ... 0x2dd: \ + { \ + static const char map[5] = "\xc6\xc7\xca\xce\xcd"; \ + \ + tmp[0] = map[ch - 0x2d8]; \ + tmp[1] = ' '; \ + cp = tmp; \ + } \ + break; \ + case 0x2014: \ + cp = "\xd0"; \ + break; \ + case 0x2018: \ + cp = "\xa9"; \ + break; \ + case 0x2019: \ + cp = "\xb9"; \ + break; \ + case 0x201c: \ + cp = "\xaa"; \ + break; \ + case 0x201d: \ + cp = "\xba"; \ + break; \ + case 0x2122: \ + cp = "\xd4"; \ + break; \ + case 0x2126: \ + cp = "\xe0"; \ + break; \ + case 0x215b ... 0x215e: \ + tmp[0] = 0xdc + (ch - 0x215b); \ + tmp[1] = '\0'; \ + cp = tmp; \ + break; \ + case 0x2190 ... 0x2193: \ + tmp[0] = 0xac + (ch - 0x2190); \ + tmp[1] = '\0'; \ + cp = tmp; \ + break; \ + case 0x266a: \ + cp = "\xd5"; \ + break; \ + default: \ + cp = NULL; \ + fail = 1; \ + } \ + \ + if (fail) \ + { \ + /* Illegal characters. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + else if (from_ucs4[ch][0] == '\0' && ch != 0) \ + { \ + /* Illegal characters. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + cp = from_ucs4[ch]; \ + \ + *outptr++ = cp[0]; \ + /* Now test for a possible second byte and write this if possible. */ \ + if (cp[1] != '\0') \ + { \ + if (NEED_LENGTH_TEST && outptr >= outend) \ + { \ + /* The result does not fit into the buffer. */ \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + *outptr++ = cp[1]; \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "ISO_6937") != NULL) - step->data = &from_iso6937_object; - else if (strcasestr (step->to_name, "ISO_6937") != NULL) - step->data = &to_iso6937_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothign to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_iso6937_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = inbuf[cnt]; - wchar_t ch; - - if (inchar >= '\xc1' && inchar <= '\xcf') - { - /* Composed character. First test whether the next - character is also available. */ - int inchar2; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = inbuf[++cnt]; - - if (inchar2 < '\x20' || inchar2 >= '\x80') - /* This is illegal. */ - ch = L'\0'; - else - ch = to_ucs4_comb[inchar - 0xc1][inchar2 - 0x20]; - - if (ch == L'\0') - /* Undo the increment for illegal characters. */ - --cnt; - } - else - ch = to_ucs4[inchar]; - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - char tmp[2]; - int ch = *((wchar_t *) (inbuf + cnt)); - const char *cp; - - if (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0])) - { - int fail = 0; - switch (ch) - { - case 0x2c7: - cp = "\xcf\x20"; - break; - case 0x2d8 ... 0x2dd: - { - static const char map[5] = "\xc6\xc7\xca\xce\xcd"; - - tmp[0] = map[ch - 0x2d8]; - tmp[1] = ' '; - cp = tmp; - } - break; - case 0x2014: - cp = "\xd0"; - break; - case 0x2018: - cp = "\xa9"; - break; - case 0x2019: - cp = "\xb9"; - break; - case 0x201c: - cp = "\xaa"; - break; - case 0x201d: - cp = "\xba"; - break; - case 0x2122: - cp = "\xd4"; - break; - case 0x2126: - cp = "\xe0"; - break; - case 0x215b ... 0x215e: - tmp[0] = 0xdc + (ch - 0x215b); - tmp[1] = '\0'; - cp = tmp; - break; - case 0x2190 ... 0x2193: - tmp[0] = 0xac + (ch - 0x2190); - tmp[1] = '\0'; - cp = tmp; - break; - case 0x266a: - cp = "\xd5"; - break; - default: - cp = NULL; - fail = 1; - } - - if (fail) - /* Illegal characters. */ - break; - } - else if (ch < 0 || (from_ucs4[ch][0] == '\0' && ch != 0)) - break; - else - cp = from_ucs4[ch]; - - outbuf[outchars] = cp[0]; - /* Now test for a possible second byte and write this - if possible. */ - if (cp[1] != '\0') - { - if (outchars + 1 >= data->outbufsize) - { - /* The result does not fit into the buffer. */ - extra = 1; - break; - } - outbuf[++outchars] = cp[1]; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_iso6937_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - return result; -} +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/iso8859-1.c b/iconvdata/iso8859-1.c index b9484a0..3e50b79 100644 --- a/iconvdata/iso8859-1.c +++ b/iconvdata/iso8859-1.c @@ -18,179 +18,44 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> -#include <string.h> - -/* Direction of the transformation. */ -static int to_iso88591_object; -static int from_iso88591_object; - - -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "ISO-8859-1") != NULL) - step->data = &from_iso88591_object; - else if (strcasestr (step->to_name, "ISO-8859-1") != NULL) - step->data = &to_iso88591_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_iso88591_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - *((wchar_t *) (outbuf + outwchars)) = - (unsigned char) inbuf[cnt]; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - if (*((wchar_t *) (inbuf + cnt)) >= L'\0' - && *((wchar_t *) (inbuf + cnt)) <= L'\377') - outbuf[outchars] = *((wchar_t *) (inbuf + cnt)); - else - /* Here is where the transliteration would enter the - scene. */ - break; - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_iso88591_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +#include <stdint.h> + +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "ISO-8859-1" +#define FROM_LOOP from_iso8859_1 +#define TO_LOOP to_iso8859_1 +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 + +/* First define the conversion function from ISO 8859-1 to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + *((uint32_t *) outptr)++ = *inptr++; +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + if (ch > 0xff) \ + { \ + /* We have an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + *outptr++ = (unsigned char) ch; \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/iso8859-10.c b/iconvdata/iso8859-10.c index eb54e49..acce325 100644 --- a/iconvdata/iso8859-10.c +++ b/iconvdata/iso8859-10.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-10. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-10.h> -#define NAME "ISO-8859-10" + +#define CHARSET_NAME "ISO-8859-10" +#define HAS_HOLES 0 /* All 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-2.c b/iconvdata/iso8859-2.c index fa07b752..8a5e624 100644 --- a/iconvdata/iso8859-2.c +++ b/iconvdata/iso8859-2.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-2. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-2.h> -#define NAME "ISO-8859-2" + +#define CHARSET_NAME "ISO-8859-2" +#define HAS_HOLES 0 /* All 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-3.c b/iconvdata/iso8859-3.c index c31d388..10e52e4 100644 --- a/iconvdata/iso8859-3.c +++ b/iconvdata/iso8859-3.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-3. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-3.h> -#define NAME "ISO-8859-3" + +#define CHARSET_NAME "ISO-8859-3" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-4.c b/iconvdata/iso8859-4.c index 7ae9847..01cdbf0 100644 --- a/iconvdata/iso8859-4.c +++ b/iconvdata/iso8859-4.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-4. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-4.h> -#define NAME "ISO-8859-4" + +#define CHARSET_NAME "ISO-8859-4" +#define HAS_HOLES 0 /* All 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-5.c b/iconvdata/iso8859-5.c index b4791f1..edf0f47 100644 --- a/iconvdata/iso8859-5.c +++ b/iconvdata/iso8859-5.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-5. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-5.h> -#define NAME "ISO-8859-5" + +#define CHARSET_NAME "ISO-8859-5" +#define HAS_HOLES 0 /* All 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-6.c b/iconvdata/iso8859-6.c index 1e88ec9..59ce3f2 100644 --- a/iconvdata/iso8859-6.c +++ b/iconvdata/iso8859-6.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-6. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-6.h> -#define NAME "ISO-8859-6" + +#define CHARSET_NAME "ISO-8859-6" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-7.c b/iconvdata/iso8859-7.c index 63220bb..d75e93e 100644 --- a/iconvdata/iso8859-7.c +++ b/iconvdata/iso8859-7.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-7. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-7.h> -#define NAME "ISO-8859-7" + +#define CHARSET_NAME "ISO-8859-7" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-8.c b/iconvdata/iso8859-8.c index 2246ae4..1612f14 100644 --- a/iconvdata/iso8859-8.c +++ b/iconvdata/iso8859-8.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-8. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-8.h> -#define NAME "ISO-8859-8" + +#define CHARSET_NAME "ISO-8859-8" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/iso8859-9.c b/iconvdata/iso8859-9.c index 5260362..063f189 100644 --- a/iconvdata/iso8859-9.c +++ b/iconvdata/iso8859-9.c @@ -1,5 +1,5 @@ /* Conversion from and to ISO 8859-9. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <iso8859-9.h> -#define NAME "ISO-8859-9" + +#define CHARSET_NAME "ISO-8859-9" +#define HAS_HOLES 0 /* All 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/jis0201.c b/iconvdata/jis0201.c index e8e04e1..65321e4 100644 --- a/iconvdata/jis0201.c +++ b/iconvdata/jis0201.c @@ -1,5 +1,5 @@ /* Mapping tables for JIS0201 handling. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,10 +18,10 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> -const wchar_t jisx0201_to_ucs4[256] = +const uint32_t __jisx0201_to_ucs4[256] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, diff --git a/iconvdata/jis0201.h b/iconvdata/jis0201.h index 8f920f9..1514c88 100644 --- a/iconvdata/jis0201.h +++ b/iconvdata/jis0201.h @@ -22,13 +22,13 @@ #define _JIS0201_H 1 /* Conversion table. */ -extern const wchar_t jis0201_to_ucs4[]; +extern const uint32_t __jis0201_to_ucs4[]; -static inline wchar_t +static inline uint32_t jisx0201_to_ucs4 (char ch) { - wchar_t val = jis0201_to_ucs4[(unsigned char) ch]; + uint32_t val = __jis0201_to_ucs4[(unsigned char) ch]; if (val == 0 && ch != '\0') val = UNKNOWN_10646_CHAR; @@ -38,7 +38,7 @@ jisx0201_to_ucs4 (char ch) static inline size_t -ucs4_to_jisx0201 (wchar_t wch, char *s) +ucs4_to_jisx0201 (uint32_t wch, char *s) { char ch; diff --git a/iconvdata/jis0208.c b/iconvdata/jis0208.c index 8db4085..964f73d 100644 --- a/iconvdata/jis0208.c +++ b/iconvdata/jis0208.c @@ -1,5 +1,5 @@ /* Mapping tables for JIS0208 handling. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,7 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> #include "jis0208.h" @@ -58,7 +58,7 @@ printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const uint16_t jis0208_to_ucs[0x1e80] = +const uint16_t __jis0208_to_ucs[0x1e80] = { [0x0000] = 0x3000, [0x0001] = 0x3001, [0x0002] = 0x3002, [0x0003] = 0xff0c, [0x0004] = 0xff0e, [0x0005] = 0x30fb, [0x0006] = 0xff1a, [0x0007] = 0xff1b, @@ -1783,7 +1783,7 @@ const uint16_t jis0208_to_ucs[0x1e80] = }; -const char jisx0208_from_ucs4_lat1[256][2] = +const char __jisx0208_from_ucs4_lat1[256][2] = { [0x005C] = "\x21\x40", [0x00A2] = "\x21\x71", [0x00A3] = "\x21\x72", [0x00A7] = "\x21\x78", [0x00A8] = "\x21\x2f", [0x00AC] = "\x22\x4c", @@ -1814,7 +1814,7 @@ const char jisx0208_from_ucs4_lat1[256][2] = printf ("\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char jisx0208_from_ucs4_greek[0xc1][2] = +const char __jisx0208_from_ucs4_greek[0xc1][2] = { [0x00] = "\x26\x21", [0x01] = "\x26\x22", [0x02] = "\x26\x23", [0x03] = "\x26\x24", [0x04] = "\x26\x25", [0x05] = "\x26\x26", @@ -1887,7 +1887,7 @@ const char jisx0208_from_ucs4_greek[0xc1][2] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const struct jisx0208_ucs_idx jisx0208_from_ucs_idx[] = +const struct jisx0208_ucs_idx __jisx0208_from_ucs_idx[] = { { start: 0x2010, end: 0x2026, idx: 0 }, { start: 0x2030, end: 0x2033, idx: 23 }, @@ -2596,7 +2596,7 @@ const struct jisx0208_ucs_idx jisx0208_from_ucs_idx[] = { start: 0x9f9c, end: 0x9fa0, idx: 14109 }, { start: 0xff01, end: 0xff5d, idx: 14114 }, { start: 0xffe3, end: 0xffe5, idx: 14207 }, - { start: 0 } + { start: 0xffff, end: 0xffff, idx: 0 } }; @@ -2637,7 +2637,7 @@ const struct jisx0208_ucs_idx jisx0208_from_ucs_idx[] = } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char jisx0208_from_ucs_tab[14210][2] = +const char __jisx0208_from_ucs_tab[14210][2] = { "\x20\x10", "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\x20\x15", "\x20\x16", "\x00\x00", "\x20\x18", "\x20\x19", "\x00\x00", "\x00\x00", diff --git a/iconvdata/jis0208.h b/iconvdata/jis0208.h index 94d2764..a83ee3b 100644 --- a/iconvdata/jis0208.h +++ b/iconvdata/jis0208.h @@ -25,12 +25,12 @@ #include <stdint.h> /* Conversion table. */ -extern const uint16_t jis0208_to_ucs[]; +extern const uint16_t __jis0208_to_ucs[]; -extern const char jisx0208_from_ucs4_lat1[256][2]; -extern const char jisx0208_from_ucs4_greek[0xc1][2]; -extern const struct jisx0208_ucs_idx jisx0208_from_ucs_idx[]; -extern const char jisx0208_from_ucs_tab[][2]; +extern const char __jisx0208_from_ucs4_lat1[256][2]; +extern const char __jisx0208_from_ucs4_greek[0xc1][2]; +extern const struct jisx0208_ucs_idx __jisx0208_from_ucs_idx[]; +extern const char __jisx0208_from_ucs_tab[][2]; /* Struct for table with indeces in UCS mapping table. */ @@ -42,8 +42,8 @@ struct jisx0208_ucs_idx }; -static inline wchar_t -jisx0208_to_ucs4 (const char **s, size_t avail, unsigned char offset) +static inline uint32_t +jisx0208_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) { unsigned char ch = *(*s); unsigned char ch2; @@ -65,34 +65,38 @@ jisx0208_to_ucs4 (const char **s, size_t avail, unsigned char offset) (*s) += 2; - return jis0208_to_ucs[idx] ?: ((*s) -= 2, UNKNOWN_10646_CHAR); + return __jis0208_to_ucs[idx] ?: ((*s) -= 2, UNKNOWN_10646_CHAR); } static inline size_t -ucs4_to_jisx0208 (wchar_t wch, char *s, size_t avail) +ucs4_to_jisx0208 (uint32_t wch, char *s, size_t avail) { unsigned int ch = (unsigned int) wch; - const char *cp = NULL; + const char *cp; if (avail < 2) return 0; if (ch < 0x100) - cp = jisx0208_from_ucs4_lat1[ch]; + cp = __jisx0208_from_ucs4_lat1[ch]; else if (ch >= 0x391 && ch <= 0x451) - cp = jisx0208_from_ucs4_greek[ch]; + cp = __jisx0208_from_ucs4_greek[ch]; else { - const struct jisx0208_ucs_idx *rp = jisx0208_from_ucs_idx; + const struct jisx0208_ucs_idx *rp = __jisx0208_from_ucs_idx; + if (ch >= 0xffff) + return UNKNOWN_10646_CHAR; while (ch > rp->end) ++rp; if (ch >= rp->start) - cp = jisx0208_from_ucs_tab[rp->idx + ch - rp->start]; + cp = __jisx0208_from_ucs_tab[rp->idx + ch - rp->start]; + else + return UNKNOWN_10646_CHAR; } - if (cp == NULL || cp[0] == '\0') + if (cp[0] == '\0') return UNKNOWN_10646_CHAR; s[0] = cp[0]; diff --git a/iconvdata/jis0212.c b/iconvdata/jis0212.c index 7484c5e..eca645f 100644 --- a/iconvdata/jis0212.c +++ b/iconvdata/jis0212.c @@ -19,7 +19,6 @@ Boston, MA 02111-1307, USA. */ #include <stdint.h> -#include <wchar.h> #include <jis0212.h> @@ -54,7 +53,7 @@ $first, $last, $idx); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const struct jisx0212_idx jisx0212_to_ucs_idx[] = +const struct jisx0212_idx __jisx0212_to_ucs_idx[] = { { start: 0x006c, end: 0x0076, idx: 0 }, { start: 0x007f, end: 0x0081, idx: 11 }, @@ -67,7 +66,7 @@ const struct jisx0212_idx jisx0212_to_ucs_idx[] = { start: 0x034e, end: 0x03a4, idx: 107 }, { start: 0x03ac, end: 0x0402, idx: 194 }, { start: 0x0582, end: 0x1c2a, idx: 281 }, - { start: 0 }, + { start: 0xffff, end: 0xffff, idx: 0 }, }; @@ -109,7 +108,7 @@ const struct jisx0212_idx jisx0212_to_ucs_idx[] = } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const uint16_t jisx0212_to_ucs[] = +const uint16_t __jisx0212_to_ucs[] = { 0x02d8, 0x02c7, 0x00b8, 0x02d9, 0x02dd, 0x00af, 0x02db, 0x02da, 0x007e, 0x0384, 0x0385, 0x00a1, 0x00a6, 0x00bf, 0x00ba, 0x00aa, @@ -905,7 +904,7 @@ const uint16_t jisx0212_to_ucs[] = $first, $last, $idx); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const struct jisx0212_idx jisx0212_from_ucs_idx[] = +const struct jisx0212_idx __jisx0212_from_ucs_idx[] = { { start: 0x007e, end: 0x007e, idx: 0 }, { start: 0x00a1, end: 0x00af, idx: 1 }, @@ -1654,7 +1653,7 @@ const struct jisx0212_idx jisx0212_from_ucs_idx[] = { start: 0x9f68, end: 0x9f7d, idx: 13393 }, { start: 0x9f8f, end: 0x9f97, idx: 13415 }, { start: 0x9f9e, end: 0x9fa5, idx: 13424 }, - { start: 0 } + { start: 0xffff, end: 0xffff, idx: 0 } }; @@ -1697,7 +1696,7 @@ const struct jisx0212_idx jisx0212_from_ucs_idx[] = } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -const char jisx0212_from_ucs[][2] = +const char __jisx0212_from_ucs[][2] = { "\x7e\x00", "\xa1\x00", "\x00\x00", "\x00\x00", "\xa4\x00", "\x00\x00", "\xa6\x00", "\x00\x00", "\x00\x00", "\xa9\x00", "\xaa\x00", "\x00\x00", diff --git a/iconvdata/jis0212.h b/iconvdata/jis0212.h index cf5d9191..4930437 100644 --- a/iconvdata/jis0212.h +++ b/iconvdata/jis0212.h @@ -34,20 +34,20 @@ struct jisx0212_idx }; /* Conversion table. */ -extern const struct jisx0212_idx jisx0212_to_ucs_idx[]; -extern const uint16_t jisx0212_to_ucs[]; +extern const struct jisx0212_idx __jisx0212_to_ucs_idx[]; +extern const uint16_t __jisx0212_to_ucs[]; -extern const struct jisx0212_idx jisx0212_from_ucs_idx[]; -extern const char jisx0212_from_ucs[][2]; +extern const struct jisx0212_idx __jisx0212_from_ucs_idx[]; +extern const char __jisx0212_from_ucs[][2]; static inline wchar_t -jisx0212_to_ucs4 (const char **s, size_t avail, unsigned char offset) +jisx0212_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) { - const struct jisx0212_idx *rp = jisx0212_to_ucs_idx; + const struct jisx0212_idx *rp = __jisx0212_to_ucs_idx; unsigned char ch = *(*s); unsigned char ch2; - wchar_t wch = L'\0'; + uint32_t wch = 0; int idx; if (ch < offset || (ch - offset) <= 0x6d || (ch - offset) > 0xea) @@ -62,10 +62,10 @@ jisx0212_to_ucs4 (const char **s, size_t avail, unsigned char offset) idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset); - while (idx < rp->start) + while (idx > rp->end) ++rp; - if (idx <= rp->end) - wch = jisx0212_to_ucs[rp->idx + idx - rp->start]; + if (idx >= rp->start) + wch = __jisx0212_to_ucs[rp->idx + idx - rp->start]; if (wch != L'\0') (*s) += 2; @@ -79,16 +79,20 @@ jisx0212_to_ucs4 (const char **s, size_t avail, unsigned char offset) static inline size_t ucs4_to_jisx0212 (wchar_t wch, char *s, size_t avail) { - const struct jisx0212_idx *rp = jisx0212_from_ucs_idx; + const struct jisx0212_idx *rp = __jisx0212_from_ucs_idx; unsigned int ch = (unsigned int) wch; - const char *cp = NULL; + const char *cp; + if (ch >= 0xffff) + return UNKNOWN_10646_CHAR; while (ch > rp->end) ++rp; if (ch >= rp->start) - cp = jisx0212_from_ucs[rp->idx + ch - rp->start]; + cp = __jisx0212_from_ucs[rp->idx + ch - rp->start]; + else + return UNKNOWN_10646_CHAR; - if (cp == NULL || cp[0] == '\0') + if (cp[0] == '\0') return UNKNOWN_10646_CHAR; s[0] = cp[0]; diff --git a/iconvdata/johab.c b/iconvdata/johab.c index c9912a7..6a582c0 100644 --- a/iconvdata/johab.c +++ b/iconvdata/johab.c @@ -1,7 +1,8 @@ /* Mapping tables for JOHAB handling. Copyright (C) 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jungshik Shin <jshin@pantheon.yale.edu>, 1998. + Contributed by Jungshik Shin <jshin@pantheon.yale.edu> + and Ulrich Drepper <drepper@cygnus.com>, 1998. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -18,16 +19,9 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> #include <stdint.h> -#include <string.h> -#include <wchar.h> #include <ksc5601.h> -/* Direction of the transformation. */ -static int to_johab_object; -static int from_johab_object; - /* The table for Bit pattern to Hangul Jamo 5 bits each are used to encode leading consonants(19 + 1 filler), medial vowels(21 + 1 filler) @@ -37,19 +31,19 @@ static int from_johab_object; 0 : Filler, -1: invalid, >= 1 : valid */ -const int init[32] = +static const int init[32] = { -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; -const int mid[32] = +static const int mid[32] = { -1, -1, 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1, 12, 13, 14, 15, 16, 17, -1, -1, 18, 19, 20, 21, -1, -1 }; -const int final[32] = +static const int final[32] = { -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1 @@ -63,14 +57,14 @@ const int final[32] = block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff] */ -const wchar_t init_to_ucs[19] = +static const uint32_t init_to_ucs[19] = { 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b, 0x314c, 0x314d, 0x314e }; -const wchar_t final_to_ucs[27] = +static const uint32_t final_to_ucs[27] = { L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0', 0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f, @@ -88,7 +82,7 @@ const wchar_t final_to_ucs[27] = to get the same result arithmetically. */ -const int init_to_bit[19] = +static const int init_to_bit[19] = { 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, @@ -96,7 +90,7 @@ const int init_to_bit[19] = 0xd000 }; -const int mid_to_bit[21] = +static const int mid_to_bit[21] = { 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0, 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0, @@ -104,7 +98,7 @@ const int mid_to_bit[21] = 0x0340, 0x0360, 0x0380, 0x03a0 }; -const int final_to_bit[28] = +static const int final_to_bit[28] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d @@ -118,7 +112,7 @@ const int final_to_bit[28] = 2. Unicode 2.0 manual */ -const uint16_t jamo_from_ucs_table[51] = +static const uint16_t jamo_from_ucs_table[51] = { 0x8841, 0x8c41, 0x8444, @@ -137,21 +131,20 @@ const uint16_t jamo_from_ucs_table[51] = }; -static inline wchar_t -johab_sym_hanja_to_ucs (int idx, int c1, int c2) +static inline uint32_t +johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2) { if (idx <= 0xdefe) - return (wchar_t) ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2 - - (c2 > 0x90 ? 0x43 : 0x31)]; + return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2 + - (c2 > 0x90 ? 0x43 : 0x31)]; else - return (wchar_t) ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2 - - (c2 > 0x90 ? 0x43 : 0x31)]; + return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2 + - (c2 > 0x90 ? 0x43 : 0x31)]; } static uint16_t -johab_hanja_from_ucs (wchar_t ch) +johab_hanja_from_ucs (uint32_t ch) { - uint16_t idx; if (ucs4_to_ksc5601_hanja (ch, &idx)) { @@ -168,7 +161,7 @@ johab_hanja_from_ucs (wchar_t ch) } static uint16_t -johab_sym_from_ucs (wchar_t ch) +johab_sym_from_ucs (uint32_t ch) { uint16_t idx; if (ucs4_to_ksc5601_sym (ch, &idx)) @@ -186,9 +179,8 @@ johab_sym_from_ucs (wchar_t ch) } - static inline void -johab_from_ucs4 (wchar_t ch, unsigned char *cp) +johab_from_ucs4 (uint32_t ch, unsigned char *cp) { if (ch >= 0x7f) { @@ -215,315 +207,205 @@ johab_from_ucs4 (wchar_t ch, unsigned char *cp) else idx = johab_sym_from_ucs (ch); - *cp = (char) (idx / 256); - *(cp + 1) = (char) (idx & 0xff); + cp[0] = (unsigned char) (idx / 256); + cp[1] = (unsigned char) (idx & 0xff); } else { - *cp = (char) (0x7f & ch); - *(cp + 1) = (char) 0; + cp[0] = (unsigned char) ch; + cp[1] = 0; } - -} - - -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "JOHAB") != NULL) - step->data = &from_johab_object; - else if (strcasestr (step->to_name, "JOHAB") != NULL) - step->data = &to_johab_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ } -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t * written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_johab_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = (unsigned char) inbuf[cnt]; - wchar_t ch; - /* half-width Korean Currency WON sign - if (inchar == 0x5c) - ch = 0x20a9; - else if (inchar < 0x7f) - ch = (wchar_t) inchar; - */ - if (inchar < 0x7f) - ch = (wchar_t) inchar; - - /* Johab : 1. Hangul - 1st byte : 0x84-0xd3 - 2nd byte : 0x41-0x7e, 0x81-0xfe - 2. Hanja & Symbol : - 1st byte : 0xd8-0xde, 0xe0-0xf9 - 2nd byte : 0x31-0x7e, 0x91-0xfe - 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ - - else if (inchar > 0xf9 || inchar == 0xdf - || (inchar > 0x7e && inchar < 0x84) - || (inchar > 0xd3 && inchar < 0xd9)) - /* These are illegal. */ - ch = L'\0'; - else - { - /* Two-byte character. First test whether the next - character is also available. */ - int inchar2; - int idx; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = (unsigned char) inbuf[++cnt]; - idx = inchar * 256 + inchar2; - if (inchar <= 0xd3) - { /* Hangul */ - int i, m, f; - i = init[(idx & 0x7c00) >> 10]; - m = mid[(idx & 0x03e0) >> 5]; - f = final[idx & 0x001f]; - if (i == -1 || m == -1 || f == -1) - /* This is illegal. */ - ch = L'\0'; - else if (i > 0 && m > 0) - ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; - else if (i > 0 && m == 0 & f == 0) - ch = init_to_ucs[i - 1]; - else if (i == 0 && m > 0 & f == 0) - ch = 0x314e + m; /* 0x314f + m - 1 */ - else if (i == 0 && m == 0 & f > 0) - ch = final_to_ucs[f - 1]; /* round trip?? */ - else - /* This is illegal. */ - ch = L'\0'; - } - else - { - if (inchar2 < 0x31 - || (inchar2 > 0x7e && inchar2 < 0x91) - || inchar2 == 0xff) - /* This is illegal. */ - ch = L'\0'; - else if (inchar == 0xda - && inchar2 > 0xa0 && inchar2 < 0xd4) - /* This is illegal. */ - /* Modern Hangul Jaso is defined elsewhere - in Johab */ - ch = L'\0'; - else - { - ch = johab_sym_hanja_to_ucs (idx, inchar, - inchar2); - /* if (idx <= 0xdefe) - ch = ksc5601_sym_to_ucs[(inchar - 0xd9) * 192 - + inchar2 - - (inchar2>0x90 ? 0x43 : 0x31)]; - - else - ch = ksc5601_hanja_to_ucs[(inchar - 0xe0) *192 - + inchar2 - - (inchar2>0x90 ? 0x43 : 0x31)]; - */ - } - } - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - wchar_t ch = *((wchar_t *) (inbuf + cnt)); - unsigned char cp[2]; - /* - if (ch >= (sizeof (from_ucs4_lat1) - / sizeof (from_ucs4_lat1[0]))) - { - if (ch >= 0x0391 && ch <= 0x0451) - cp = from_ucs4_greek[ch - 0x391]; - else if (ch >= 0x2010 && ch <= 0x9fa0) - cp = from_ucs4_cjk[ch - 0x02010]; - else - break; - } - else - cp = from_ucs4_lat1[ch]; - */ - johab_from_ucs4 (ch, cp); - - if (cp[0] == '\0' && ch != 0) - /* Illegal character. */ - break; - - outbuf[outchars] = cp[0]; - /* Now test for a possible second byte and write this - if possible. */ - if (cp[1] != '\0') - { - if (outchars + 1 >= data->outbufsize) - { - /* The result does not fit into the buffer. */ - extra = 1; - break; - } - outbuf[++outchars] = cp[1]; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_johab_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "JOHAB" +#define FROM_LOOP from_johab +#define TO_LOOP to_johab +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 + + +/* First define the conversion function from JOHAB to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + /* half-width Korean Currency WON sign \ + if (ch == 0x5c) \ + ch = 0x20a9; \ + else if (ch < 0x7f) \ + ch = (wchar_t) ch; \ + */ \ + if (ch < 0x7f) \ + /* Plain ASCII. */ \ + ++inptr; \ + /* Johab : 1. Hangul \ + 1st byte : 0x84-0xd3 \ + 2nd byte : 0x41-0x7e, 0x81-0xfe \ + 2. Hanja & Symbol : \ + 1st byte : 0xd8-0xde, 0xe0-0xf9 \ + 2nd byte : 0x31-0x7e, 0x91-0xfe \ + 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \ + else \ + { \ + if (ch > 0xf9 || ch == 0xdf || (ch > 0x7e && ch < 0x84) \ + || (ch > 0xd3 && ch < 0xd9)) \ + { \ + /* These are illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* Two-byte character. First test whether the next \ + character is also available. */ \ + uint32_t ch2; \ + uint_fast32_t idx; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. Store the \ + intermediate result. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + idx = ch * 256 + ch2; \ + if (ch <= 0xd3) \ + { \ + /* Hangul */ \ + uint_fast32_t i, m, f; \ + \ + i = init[(idx & 0x7c00) >> 10]; \ + m = mid[(idx & 0x03e0) >> 5]; \ + f = final[idx & 0x001f]; \ + \ + if (i == -1 || m == -1 || f == -1) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else if (i > 0 && m > 0) \ + ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \ + else if (i > 0 && m == 0 & f == 0) \ + ch = init_to_ucs[i - 1]; \ + else if (i == 0 && m > 0 & f == 0) \ + ch = 0x314e + m; /* 0x314f + m - 1 */ \ + else if (i == 0 && m == 0 & f > 0) \ + ch = final_to_ucs[f - 1]; /* round trip?? */ \ + else \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + else \ + { \ + if (ch2 < 0x31 || (ch2 > 0x7e && ch2 < 0x91) || ch2 == 0xff) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else if (ch == 0xda && ch2 > 0xa0 && ch2 < 0xd4) \ + { \ + /* This is illegal. Modern Hangul Jaso is defined \ + elsewhere in Johab */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \ + /* if (idx <= 0xdefe) \ + ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \ + + ch2 - (ch2 > 0x90 \ + ? 0x43 : 0x31)]; \ + else \ + ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \ + + ch2 - (ch2 > 0x90 \ + ?0x43 : 0x31)];\ + */ \ + } \ + } \ + } \ + \ + if (ch == 0) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 2; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + unsigned char cp[2]; \ + /* \ + if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \ + { \ + if (ch >= 0x0391 && ch <= 0x0451) \ + cp = from_ucs4_greek[ch - 0x391]; \ + else if (ch >= 0x2010 && ch <= 0x9fa0) \ + cp = from_ucs4_cjk[ch - 0x02010]; \ + else \ + break; \ + } \ + else \ + cp = from_ucs4_lat1[ch]; \ + */ \ + johab_from_ucs4 (ch, cp); \ + \ + if (cp[0] == '\0' && ch != 0) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *outptr++ = cp[0]; \ + /* Now test for a possible second byte and write this if possible. */ \ + if (cp[1] != '\0') \ + { \ + if (NEED_LENGTH_TEST && outptr >= outend) \ + { \ + /* The result does not fit into the buffer. */ \ + --outptr; \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + *outptr++ = cp[1]; \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/koi-8.c b/iconvdata/koi-8.c index 2228456..b2332c2 100644 --- a/iconvdata/koi-8.c +++ b/iconvdata/koi-8.c @@ -1,5 +1,5 @@ /* Conversion from and to KOI-8. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Get the conversion table. */ +#include <stdint.h> #include <koi-8.h> -#define NAME "KOI-8" + +#define CHARSET_NAME "KOI-8" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-generic.c> diff --git a/iconvdata/koi8-r.c b/iconvdata/koi8-r.c index 502132b..ee317bd 100644 --- a/iconvdata/koi8-r.c +++ b/iconvdata/koi8-r.c @@ -1,5 +1,5 @@ /* Conversion from and to KOI8-R. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,12 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> + +/* Specify the conversion table. */ #define TABLES <koi8-r.h> -#define NAME "KOI8-R" + +#define CHARSET_NAME "KOI8-R" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-gap.c> diff --git a/iconvdata/ksc5601.c b/iconvdata/ksc5601.c index c919425..da64c43 100644 --- a/iconvdata/ksc5601.c +++ b/iconvdata/ksc5601.c @@ -18,7 +18,7 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +#include <stdint.h> #include "ksc5601.h" /* @@ -50,7 +50,7 @@ perl tab21.pl > ksc_hangul1.tb */ -const uint16_t ksc5601_hangul_to_ucs[KSC5601_HANGUL]= +const uint16_t __ksc5601_hangul_to_ucs[KSC5601_HANGUL]= { 0xac00, 0xac01, 0xac04, 0xac07, 0xac08, 0xac09, 0xac0a, 0xac10, 0xac11, 0xac12, 0xac13, 0xac14, 0xac15, 0xac16, 0xac17, 0xac19, @@ -369,7 +369,7 @@ grep -v '# HANGUL SYLLABLE' | perl tab11.pl > ksc_sym1.tb */ -const uint16_t ksc5601_sym_to_ucs[] = +const uint16_t __ksc5601_sym_to_ucs[] = { [0x0000] = 0x3000, [0x0001] = 0x3001, [0x0002] = 0x3002, [0x0003] = 0x00b7, [0x0004] = 0x2025, [0x0005] = 0x2026, [0x0006] = 0x00a8, [0x0007] = 0x3003, @@ -646,7 +646,7 @@ perl tab12.pl > ksc_sym2.tb */ -const uint16_t ksc5601_sym_from_ucs[KSC5601_SYMBOL][2] = +const uint16_t __ksc5601_sym_from_ucs[KSC5601_SYMBOL][2] = { {0x00a1, 0x222e}, {0x00a4, 0x2234}, {0x00a7, 0x2157}, {0x00a8, 0x2127}, {0x00aa, 0x2823}, {0x00ad, 0x2129}, {0x00b0, 0x2146}, {0x00b1, 0x213e}, @@ -914,7 +914,7 @@ perl tab21.pl > ksc_hanja1.tb printf ("\n"); */ -const uint16_t ksc5601_hanja_to_ucs[KSC5601_HANJA]= +const uint16_t __ksc5601_hanja_to_ucs[KSC5601_HANJA]= { 0x4f3d, 0x4f73, 0x5047, 0x50f9, 0x52a0, 0x53ef, 0x5475, 0x54e5, 0x5609, 0x5ac1, 0x5bb6, 0x6687, 0x67b6, 0x67b7, 0x67ef, 0x6b4c, @@ -1550,7 +1550,7 @@ awk '{print $2,$1}' | sort -u | perl tab12.pl > ksc_hanja2.tb */ -const uint16_t ksc5601_hanja_from_ucs[KSC5601_HANJA][2]= +const uint16_t __ksc5601_hanja_from_ucs[KSC5601_HANJA][2]= { {0x4e00, 0x6c69}, {0x4e01, 0x6f4b}, {0x4e03, 0x7652}, {0x4e07, 0x5832}, {0x4e08, 0x6d5b}, {0x4e09, 0x5f32}, {0x4e0a, 0x5f3e}, {0x4e0b, 0x793b}, diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h index 0d86c7b..8a5c40e 100644 --- a/iconvdata/ksc5601.h +++ b/iconvdata/ksc5601.h @@ -28,18 +28,18 @@ #include <stdint.h> /* Conversion table. */ -extern const uint16_t ksc5601_hangul_to_ucs[KSC5601_HANGUL]; -extern const uint16_t ksc5601_sym_to_ucs[]; -extern const uint16_t ksc5601_sym_from_ucs[KSC5601_SYMBOL][2]; -extern const uint16_t ksc5601_hanja_to_ucs[KSC5601_HANJA]; -extern const uint16_t ksc5601_hanja_from_ucs[KSC5601_HANJA][2]; +extern const uint16_t __ksc5601_hangul_to_ucs[KSC5601_HANGUL]; +extern const uint16_t __ksc5601_sym_to_ucs[]; +extern const uint16_t __ksc5601_sym_from_ucs[KSC5601_SYMBOL][2]; +extern const uint16_t __ksc5601_hanja_to_ucs[KSC5601_HANJA]; +extern const uint16_t __ksc5601_hanja_from_ucs[KSC5601_HANJA][2]; /* static inline wchar_t ksc5601_to_ucs4 (char **s, size_t avail) */ -static inline wchar_t +static inline uint32_t ksc5601_to_ucs4 (uint16_t s) { unsigned char ch = s / 256; @@ -61,23 +61,25 @@ ksc5601_to_ucs4 (uint16_t s) Hangul in KS C 5601 : row 16 - row 40 */ if (idx >= 1410 && idx < 3760) - return ksc5601_hangul_to_ucs[idx-1410]; + return __ksc5601_hangul_to_ucs[idx-1410]; else if (idx > 3854) /* Hanja : row 42 - row 93 : 3854 = 94 * (42-1) */ - return ksc5601_hanja_to_ucs[idx-3854]; + return __ksc5601_hanja_to_ucs[idx-3854]; else - return ksc5601_sym_to_ucs[idx] ?: UNKNOWN_10646_CHAR; + return __ksc5601_sym_to_ucs[idx] ?: UNKNOWN_10646_CHAR; } static inline size_t -ucs4_to_ksc5601_hangul (wchar_t wch, uint16_t *s) +ucs4_to_ksc5601_hangul (uint32_t wch, uint16_t *s) { - int l=0,m,u=KSC5601_HANGUL-1; - wchar_t try; + int l = 0; + int m; + int u = KSC5601_HANGUL - 1; + uint32_t try; while (l <= u) { - try = (wchar_t) ksc5601_hangul_to_ucs[m=(l+u)/2]; + try = (uint32_t) __ksc5601_hangul_to_ucs[m=(l+u)/2]; if (try > wch) u = m - 1; else if (try < wch) @@ -93,21 +95,24 @@ ucs4_to_ksc5601_hangul (wchar_t wch, uint16_t *s) static inline size_t -ucs4_to_ksc5601_hanja (wchar_t wch, uint16_t *s) +ucs4_to_ksc5601_hanja (uint32_t wch, uint16_t *s) { - int l=0,m,u=KSC5601_HANJA-1; - wchar_t try; + int l = 0; + int m; + int u = KSC5601_HANJA - 1; + uint32_t try; while (l <= u) { - try = (wchar_t) ksc5601_hanja_from_ucs[m=(l+u)/2][0]; + m = (l + u) / 2; + try = (uint32_t) __ksc5601_hanja_from_ucs[m][0]; if (try > wch) u=m-1; else if (try < wch) l = m + 1; else { - *s = ksc5601_hanja_from_ucs[m][1]; + *s = __ksc5601_hanja_from_ucs[m][1]; return 2; } } @@ -115,24 +120,24 @@ ucs4_to_ksc5601_hanja (wchar_t wch, uint16_t *s) } static inline size_t -ucs4_to_ksc5601_sym (wchar_t wch, uint16_t *s) +ucs4_to_ksc5601_sym (uint32_t wch, uint16_t *s) { int l = 0; int m; int u = KSC5601_SYMBOL - 1; - wchar_t try; + uint32_t try; while (l <= u) { m = (l + u) / 2; - try = ksc5601_sym_from_ucs[m][0]; + try = __ksc5601_sym_from_ucs[m][0]; if (try > wch) u = m - 1; else if (try < wch) l = m + 1; else { - *s = ksc5601_sym_from_ucs[m][1]; + *s = __ksc5601_sym_from_ucs[m][1]; return 2; } } @@ -146,13 +151,13 @@ ucs4_to_ksc5601 (wchar_t wch, char **s, size_t avail) */ static inline size_t -ucs4_to_ksc5601 (wchar_t ch, uint16_t *s) +ucs4_to_ksc5601 (uint32_t ch, uint16_t *s) { *s = (uint16_t) UNKNOWN_10646_CHAR; /* FIXIT */ if (ch >= 0xac00 && ch <= 0xd7a3) return ucs4_to_ksc5601_hangul (ch, s); - else if (ch >= 0x4e00 && ch <= 0x9fff || ch >= 0xf900 && ch <= 0xfa0b) + else if (ch >= 0x4e00 && ch <= 0x9fff || ch >= 0xf900 && ch <= 0xfa0b) return ucs4_to_ksc5601_hanja (ch, s); else return ucs4_to_ksc5601_sym (ch, s); diff --git a/iconvdata/latin-greek-1.c b/iconvdata/latin-greek-1.c index d62969a..37ccf09 100644 --- a/iconvdata/latin-greek-1.c +++ b/iconvdata/latin-greek-1.c @@ -1,5 +1,5 @@ /* Conversion from and to LATIN-GREEK-1. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,10 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Specify the conversion table. */ #define TABLES <latin-greek-1.h> -#define NAME "LATIN-GREEK-1" + +#define CHARSET_NAME "LATIN-GREEK-1" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-gap.c> diff --git a/iconvdata/latin-greek.c b/iconvdata/latin-greek.c index 90270e0..8810f4a 100644 --- a/iconvdata/latin-greek.c +++ b/iconvdata/latin-greek.c @@ -1,5 +1,5 @@ /* Conversion from and to LATIN-GREEK. - Copyright (C) 1997 Free Software Foundation, Inc. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,7 +18,10 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <wchar.h> +/* Specify the conversion table. */ #define TABLES <latin-greek.h> -#define NAME "LATIN-GREEK" + +#define CHARSET_NAME "LATIN-GREEK" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + #include <8bit-gap.c> diff --git a/iconvdata/run-iconv-test.sh b/iconvdata/run-iconv-test.sh index ebafa2f..af6a136 100755 --- a/iconvdata/run-iconv-test.sh +++ b/iconvdata/run-iconv-test.sh @@ -52,10 +52,18 @@ while read from to targets; do { echo "*** conversion from $t to $to failed"; exit 1; } test -s $temp1 && cmp testdata/$from $temp2 >& /dev/null || { echo "*** $from -> t -> $to conversion failed"; exit 1; } + rm -f $temp1 $temp2 + + # Now test some bigger text, entirely in ASCII. + $ICONV -f $from -t $t testdata/suntzus | + $ICONV -f $t -t $to > $temp1 || + { echo "*** conversion $from->$t->$to of suntzus failed"; exit 1; } + cmp testdata/suntzus.txt $temp1 || + { echo "*** conversion $from->$t->$to of suntzus incorrect"; exit 1; } + rm -f $temp1 # All tests ok. echo "$from -> $t -> $to ok" - rm -f $temp1 $temp2 done done < TESTS diff --git a/iconvdata/sjis.c b/iconvdata/sjis.c index 33dc2f1..d65b905 100644 --- a/iconvdata/sjis.c +++ b/iconvdata/sjis.c @@ -18,12 +18,10 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> #include <stdint.h> -#include <string.h> #include <wchar.h> -static const wchar_t halfkana_to_ucs4[] = +static const uint32_t halfkana_to_ucs4[] = { 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70, @@ -3981,268 +3979,151 @@ static const char from_ucs4_cjk[32657][2] = }; -/* Direction of the transformation. */ -static int to_sjis_object; -static int from_sjis_object; +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "SJIS" +#define FROM_LOOP from_sjis +#define TO_LOOP to_sjis +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +/* First define the conversion function from SJIS to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + if (ch == 0x5c) \ + { \ + ch = 0xa5; \ + ++inptr; \ + } \ + else if (ch == 0x7e) \ + { \ + ch = 0x203e; \ + ++inptr; \ + } \ + else if (ch < 0x7e) \ + ++inptr; \ + else if (ch >= 0xa1 && ch <= 0xdf) \ + { \ + ch = halfkana_to_ucs4[ch - 0xa1]; \ + ++inptr; \ + } \ + else if (ch > 0xea || ch == 0xa0 || ch == 0x7f || ch == 0x80) \ + { \ + /* These are illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* Two-byte character. First test whether the next character \ + is also available. */ \ + uint32_t ch2; \ + uint_fast32_t idx; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. Store \ + the intermediate result. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + idx = ch * 256 + ch2; \ + if (idx < 0x8140 || (idx > 0x84be && idx < 0x889f) \ + || (idx > 0x89fc && idx < 0x9040) \ + || (idx > 0x9ffc && idx < 0xe040) || idx > 0xeaa4) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* We could pack the data a bit more dense. The second \ + byte will never be 0x7f and it will also be never \ + >0xfc. But this would mean yet more `if's. */ \ + if (idx <= 0x84be) \ + ch = cjk_block1[(ch - 0x81) * 192 + ch2 - 0x40]; \ + else if (idx <= 0x89fc) \ + ch = cjk_block2[(ch - 0x88) * 192 + ch2 - 0x9f]; \ + else if (idx <= 0x9ffc) \ + ch = cjk_block3[(ch - 0x90) * 192 + ch2 - 0x40]; \ + else \ + ch = cjk_block4[(ch - 0xe0) * 192 + ch2 - 0x40]; \ + } \ + \ + if (ch == 0) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "SJIS") != NULL) - step->data = &from_sjis_object; - else if (strcasestr (step->to_name, "SJIS") != NULL) - step->data = &to_sjis_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_sjis_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = inbuf[cnt]; - wchar_t ch; - - if (inchar == 0x5c) - ch = L'\xa5'; - else if (inchar == 0x7e) - ch = 0x203e; - else if (inchar < 0x7e) - ch = (wchar_t) inchar; - else if (inchar >= 0xa1 && inchar <= 0xdf) - ch = halfkana_to_ucs4[inchar - 0xa1]; - else if (inchar > 0xea || inchar == 0xa0 || inchar == 0x7f - || inchar == 0x80) - /* These are illegal. */ - ch = L'\0'; - else - { - /* Two-byte character. First test whether the next - character is also available. */ - int inchar2; - int idx; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = inbuf[++cnt]; - idx = inchar * 256 + inchar2; - if (idx < 0x8140 || (idx > 0x84be && idx < 0x889f) - || (idx > 0x89fc && idx < 0x9040) - || (idx > 0x9ffc && idx < 0xe040) || idx > 0xeaa4) - /* This is illegal. */ - ch = L'\0'; - else - { - /* We could pack the data a bit more dense. - The second byte will never be 0x7f and it - will also be never >0xfc. But this would - mean yet more `if's. */ - if (idx <= 0x84be) - ch = cjk_block1[(inchar - 0x81) * 192 - + inchar2 - 0x40]; - else if (idx <= 0x89fc) - ch = cjk_block2[(inchar - 0x88) * 192 - + inchar2 - 0x9f]; - else if (idx <= 0x9ffc) - ch = cjk_block3[(inchar - 0x90) * 192 - + inchar2 - 0x40]; - else - ch = cjk_block4[(inchar - 0xe0) * 192 - + inchar2 - 0x40]; - } - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - int ch = *((wchar_t *) (inbuf + cnt)); - const char *cp; - - if (ch >= (sizeof (from_ucs4_lat1) - / sizeof (from_ucs4_lat1[0]))) - { - if (ch >= 0x0391 && ch <= 0x0451) - cp = from_ucs4_greek[ch - 0x391]; - else if (ch >= 0x2010 && ch <= 0x9fa0) - cp = from_ucs4_cjk[ch - 0x02010]; - else - /* Illegal character. */ - break; - } - else - cp = from_ucs4_lat1[ch]; - - if (cp[0] == '\0' && ch != 0) - /* Illegal character. */ - break; - - outbuf[outchars] = cp[0]; - /* Now test for a possible second byte and write this - if possible. */ - if (cp[1] != '\0') - { - if (outchars + 1 >= data->outbufsize) - { - /* The result does not fit into the buffer. */ - extra = 1; - break; - } - outbuf[++outchars] = cp[1]; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_sjis_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + const char *cp; \ + \ + if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \ + { \ + if (ch >= 0x0391 && ch <= 0x0451) \ + cp = from_ucs4_greek[ch - 0x391]; \ + else if (ch >= 0x2010 && ch <= 0x9fa0) \ + cp = from_ucs4_cjk[ch - 0x02010]; \ + else \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + else \ + cp = from_ucs4_lat1[ch]; \ + \ + if (cp[0] == '\0' && ch != 0) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *outptr++ = cp[0]; \ + /* Now test for a possible second byte and write this if possible. */ \ + if (cp[1] != '\0') \ + { \ + if (NEED_LENGTH_TEST && outptr >= outend) \ + { \ + /* The result does not fit into the buffer. */ \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + *outptr++ = cp[1]; \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> - if (written != NULL && data->is_last) - *written = do_write; - return result; -} +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/t61.c b/iconvdata/t61.c index b77ee71..8c288ad 100644 --- a/iconvdata/t61.c +++ b/iconvdata/t61.c @@ -19,6 +19,7 @@ Boston, MA 02111-1307, USA. */ #include <gconv.h> +#include <stdint.h> #include <string.h> /* Data taken from the WG15 tables. */ @@ -362,248 +363,133 @@ static const char from_ucs4[][2] = */ }; -/* Direction of the transformation. */ -static int to_t61_object; -static int from_t61_object; +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "T.61" +#define FROM_LOOP from_t_61 +#define TO_LOOP to_t_61 +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 + +/* First define the conversion function from T.61 to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + \ + if (ch >= 0xc1 && ch <= 0xcf) \ + { \ + /* Composed character. First test whether the next character \ + is also available. */ \ + uint32_t ch2; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + \ + if (ch2 < 0x20 || ch2 >= 0x80) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ch = to_ucs4_comb[ch - 0xc1][ch2 - 0x20]; \ + \ + inptr += 2; \ + } \ + else \ + { \ + ch = to_ucs4[ch]; \ + ++inptr; \ + } \ + \ + if (ch == 0 && *inptr != '\0') \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + char tmp[2]; \ + uint32_t ch = *((uint32_t *) inptr); \ + const char *cp; \ + \ + if (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0])) \ + { \ + if (ch == 0x2126) \ + cp = "\xe0"; \ + else if (ch == 0x2c7) \ + cp = "\xcf\x20"; \ + else if (ch < 0x2d8 || ch > 0x2dd) \ + { \ + /* Illegal characters. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + static const char map[5] = "\xc6\xc7\xca\xce\xcd"; \ + \ + tmp[0] = map[ch - 0x2d8]; \ + tmp[1] = ' '; \ + cp = tmp; \ + } \ + } \ + else \ + { \ + cp = from_ucs4[ch]; \ + \ + if (cp[0] == '\0' && ch != 0) \ + { \ + /* Illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + \ + *outptr++ = cp[0]; \ + /* Now test for a possible second byte and write this if possible. */ \ + if (cp[1] != '\0') \ + { \ + if (NEED_LENGTH_TEST && outptr >= outend) \ + { \ + /* The result does not fit into the buffer. */ \ + --outptr; \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + *outptr++ = cp[1]; \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "T.61") != NULL) - step->data = &from_t61_object; - else if (strcasestr (step->to_name, "T.61") != NULL) - step->data = &to_t61_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_t61_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = inbuf[cnt]; - wchar_t ch; - - if (inchar >= '\xc1' && inchar <= '\xcf') - { - /* Composed character. First test whether the next - character is also available. */ - int inchar2; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = inbuf[++cnt]; - - if (inchar2 < '\x20' || inchar2 >= '\x80') - /* This is illegal. */ - ch = L'\0'; - else - ch = to_ucs4_comb[inchar - 0xc1][inchar2 - 0x20]; - - if (ch == L'\0') - /* Undo the increment for illegal characters. */ - --cnt; - } - else - ch = to_ucs4[inchar]; - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - char tmp[2]; - int ch = *((wchar_t *) (inbuf + cnt)); - const char *cp; - - if (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0])) - { - if (ch == 0x2126) - cp = "\xe0"; - else if (ch == 0x2c7) - cp = "\xcf\x20"; - else if (ch < 0x2d8 || ch > 0x2dd) - /* Illegal characters. */ - break; - else - { - static const char map[5] = "\xc6\xc7\xca\xce\xcd"; - - tmp[0] = map[ch - 0x2d8]; - tmp[1] = ' '; - cp = tmp; - } - } - else if (ch < 0 || (from_ucs4[ch][0] == '\0' && ch != 0)) - break; - else - cp = from_ucs4[ch]; - - outbuf[outchars] = cp[0]; - /* Now test for a possible second byte and write this - if possible. */ - if (cp[1] != '\0') - { - if (outchars + 1 >= data->outbufsize) - { - /* The result does not fit into the buffer. */ - extra = 1; - break; - } - outbuf[++outchars] = cp[1]; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_t61_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - return result; -} +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/uhc.c b/iconvdata/uhc.c index ed4b7ad..f3addd4 100644 --- a/iconvdata/uhc.c +++ b/iconvdata/uhc.c @@ -18,16 +18,9 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <gconv.h> #include <stdint.h> -#include <string.h> -#include <wchar.h> #include <ksc5601.h> -/* Direction of the transformation. */ -static int to_uhc_object; -static int from_uhc_object; - /* egrep \ @@ -2576,20 +2569,20 @@ static const uint16_t uhc_hangul_from_ucs[11172]= 0xc64f, 0xc650, 0xc651, 0xc652 }; + static inline void -uhc_from_ucs4(wchar_t ch, unsigned char *cp) +uhc_from_ucs4 (uint32_t ch, unsigned char *cp) { if (ch >= 0x7f) { uint16_t idx=0; if (ch >= 0xac00 && ch <= 0xd7a3) - idx = uhc_hangul_from_ucs[(int) ch - 0xac00]; - else if (ch >= 0x4e00 && ch <= 0x9fa5 - || ch >= 0xf900 && ch <= 0xfa0b) + idx = uhc_hangul_from_ucs[ch - 0xac00]; + else if (ch >= 0x4e00 && ch <= 0x9fa5 || ch >= 0xf900 && ch <= 0xfa0b) { ucs4_to_ksc5601_hanja (ch,&idx); - idx |= (idx ? 0x8080 : 0); + idx |= (idx ? 0x8080 : 0); } /* Half-width Korean Currency Won Sign else if (ch == 0x20a9) @@ -2598,286 +2591,169 @@ uhc_from_ucs4(wchar_t ch, unsigned char *cp) else { ucs4_to_ksc5601_sym (ch, &idx); - idx |= (idx ? 0x8080 : 0); + idx |= (idx ? 0x8080 : 0); } - *cp = (char) (idx / 256); - *(cp + 1) = (char) (idx & 0xff) ; + cp[0] = (unsigned char) (idx / 256); + cp[1] = (unsigned char) (idx & 0xff); } - /* think about 0x5c ; '\' */ + /* XXX Think about 0x5c ; '\'. */ else { - *cp = (char) (0x7f & ch) ; - *(cp + 1) = (char) 0; + cp[0] = (unsigned char) ch; + cp[1] = (unsigned char) 0; } } -int -gconv_init (struct gconv_step *step) -{ - /* Determine which direction. */ - if (strcasestr (step->from_name, "UHC") != NULL) - step->data = &from_uhc_object; - else if (strcasestr (step->to_name, "UHC") != NULL) - step->data = &to_uhc_object; - else - return GCONV_NOCONV; - - return GCONV_OK; -} - - -void -gconv_end (struct gconv_step *data) -{ - /* Nothing to do. */ -} - - -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - do_write = 0; - - do - { - result = GCONV_OK; - - if (step->data == &from_uhc_object) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - int inchar = (unsigned char) inbuf[cnt]; - wchar_t ch; -/* half-width Korean Currency WON sign - - if (inchar == 0x5c) - ch = 0x20a9; - else if (inchar <= 0x7f) - ch = (wchar_t) inchar; -*/ - if (inchar <= 0x7f) - ch = (wchar_t) inchar; - - - else if ( inchar <= 0x80 || inchar >= 0xfe || inchar == 0xc9) - /* This is illegal. */ - ch = L'\0'; - else - { - /* Two-byte character. First test whether the next - character is also available. */ - int inchar2; - - if (cnt + 1 >= inchars) - { - /* The second character is not available. Store - the intermediate result. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - inchar2 = (unsigned char) inbuf[++cnt]; - -/* - Additional code points not present in EUC-KR - - 1st byte 2nd byte - 0x81-0xa0 0x41-0x5a, 0x61-0x7a, 0x81-0xfe total - (32) (26) + (26) + (126) = 178 5696 - - 0xa1-0xc5 0x41-0x5a 0x61-0x7a 0x81-0xa0 - (37) (26) + (26) + (32) = 84 3108 - - 0xc6 0x41-0x52 - (1) (18) 18 - - 8822 - - - 8822(only in UHC) + 2350(both in EUC-KR and UHC) = 11,172 -*/ - - if ( inchar < 0xa1 || inchar2 < 0xa1) - if ( inchar > 0xc6 || inchar2 <0x41 || - inchar2 > 0x5a && inchar2 < 0x61 || - inchar2 > 0x7a && inchar2 < 0x81 || - inchar == 0xc6 && inchar2 > 0x52 ) - ch = L'0'; - else - { - ch = uhc_extra_to_ucs[ inchar2 - 0x41 - - ( inchar2 > 0x80 ? 12 : - ( inchar2 > 0x60 ? 6 : 0 ) ) - + ( inchar < 0xa1 ? - (inchar - 0x81) * 178 : - 5696 + (inchar - 0xa1) * 84 ) ] ; - } - - else - if ( ( ch = ksc5601_to_ucs4( - (uint16_t) (inchar * 256 + inchar2) & 0x7f7f) ) - == UNKNOWN_10646_CHAR ) - - ch = L'\0'; - - if (ch == L'\0') - --cnt; - } - - if (ch == L'\0' && inbuf[cnt] != '\0') - { - /* This is an illegal character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - *((wchar_t *) (outbuf + outwchars)) = ch; - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - int extra = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - wchar_t ch = *((wchar_t *) (inbuf + cnt)); - unsigned char cp[2]; - - uhc_from_ucs4(ch,cp) ; - - if (cp[0] == '\0' && ch != 0) - /* Illegal character. */ - break; - - outbuf[outchars] = cp[0]; - /* Now test for a possible second byte and write this - if possible. */ - if (cp[1] != '\0') - { - if (outchars + 1 >= data->outbufsize) - { - /* The result does not fit into the buffer. */ - extra = 1; - break; - } - outbuf[++outchars] = cp[1]; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars + extra < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (step->data == &from_uhc_object - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "UHC" +#define FROM_LOOP from_uhc +#define TO_LOOP to_uhc +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 + +/* First define the conversion function from UHC to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = (uint32_t) *inptr; \ + \ +/* half-width Korean Currency WON sign \ + \ + if (ch == 0x5c) \ + ch = 0x20a9; \ + else if (ch <= 0x7f) \ + ch = (wchar_t) ch; \ +*/ \ + if (ch <= 0x7f) \ + ++inptr; \ + else if (ch <= 0x80 || ch >= 0xfe || ch == 0xc9) \ + { \ + /* This is illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + else \ + { \ + /* Two-byte character. First test whether the next character \ + is also available. */ \ + uint32_t ch2; \ + \ + if (NEED_LENGTH_TEST && inptr + 1 >= inend) \ + { \ + /* The second character is not available. Store \ + the intermediate result. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + \ +/* \ + Additional code points not present in EUC-KR \ + \ + 1st byte 2nd byte \ + 0x81-0xa0 0x41-0x5a, 0x61-0x7a, 0x81-0xfe total \ + (32) (26) + (26) + (126) = 178 5696 \ + \ + 0xa1-0xc5 0x41-0x5a 0x61-0x7a 0x81-0xa0 \ + (37) (26) + (26) + (32) = 84 3108 \ + \ + 0xc6 0x41-0x52 \ + (1) (18) 18 \ + \ + 8822 \ + \ + 8822(only in UHC) + 2350(both in EUC-KR and UHC) = 11,172 \ +*/ \ + \ + if (ch < 0xa1 || ch2 < 0xa1) \ + { \ + if (ch > 0xc6 || ch2 <0x41 || (ch2 > 0x5a && ch2 < 0x61) \ + || (ch2 > 0x7a && ch2 < 0x81) || (ch == 0xc6 && ch2 > 0x52)) \ + { \ + /* This is not legal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ch = uhc_extra_to_ucs[ch2 - 0x41 \ + - (ch2 > 0x80 ? 12 : (ch2 > 0x60 ? 6 : 0)) \ + + (ch < 0xa1 \ + ? (ch - 0x81) * 178 \ + : 5696 + (ch - 0xa1) * 84)]; \ + } \ + else \ + { \ + ch = ksc5601_to_ucs4 ((ch * 256 + ch2) & 0x7f7f); \ + \ + if (ch == UNKNOWN_10646_CHAR) \ + { \ + /* Illegal. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + \ + if (ch == 0) \ + { \ + /* This is an illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 2; \ + } \ + \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + uint32_t ch = *((uint32_t *) inptr); \ + unsigned char cp[2]; \ + \ + uhc_from_ucs4 (ch, cp); \ + \ + if (cp[0] == '\0' && ch != 0) \ + { \ + /* Illegal character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + *outptr++ = cp[0]; \ + /* Now test for a possible second byte and write this if possible. */ \ + if (cp[1] != '\0') \ + { \ + if (NEED_LENGTH_TEST && outptr >= outend) \ + { \ + /* The result does not fit into the buffer. */ \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + *outptr++ = cp[1]; \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> |