diff options
Diffstat (limited to 'iconvdata')
-rw-r--r-- | iconvdata/Makefile | 2 | ||||
-rw-r--r-- | iconvdata/TESTS | 2 | ||||
-rw-r--r-- | iconvdata/iso646.c | 332 | ||||
-rw-r--r-- | iconvdata/testdata/ANSI_X3.4-1968 | 6 | ||||
-rw-r--r-- | iconvdata/testdata/BS_4730 | 6 | ||||
-rw-r--r-- | iconvdata/testdata/BS_4730..UTF8 | 6 |
6 files changed, 157 insertions, 197 deletions
diff --git a/iconvdata/Makefile b/iconvdata/Makefile index c590ab2..d8fda78 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -27,7 +27,7 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \ T.61 ISO_6937 SJIS KOI-8 KOI8-R LATIN-GREEK LATIN-GREEK-1 \ HP-ROMAN8 EBCDIC-AT-DE EBCDIC-AT-DE-A EBCDIC-CA-FR \ EUC-KR UHC JOHAB libJIS libKSC BIG5 EUC-JP libGB \ - EUC-CN libCNS EUC-TW # ISO646 + EUC-CN libCNS EUC-TW ISO646 modules.so := $(addsuffix .so, $(modules)) diff --git a/iconvdata/TESTS b/iconvdata/TESTS index c37381e..b4c9505 100644 --- a/iconvdata/TESTS +++ b/iconvdata/TESTS @@ -39,3 +39,5 @@ ISO-8859-7 ISO-8859-7 Y UTF8 ISO-8859-8 ISO-8859-8 Y UTF8 ISO-8859-9 ISO-8859-9 Y UTF8 ISO-8859-10 ISO-8859-10 Y UCS2 UTF8 +ANSI_X3.4-1968 ANSI_X3.4-1968 Y UTF8 +BS_4730 BS_4730 Y UTF8 diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c index 3c40c8f..4b7c2bb 100644 --- a/iconvdata/iso646.c +++ b/iconvdata/iso646.c @@ -32,9 +32,23 @@ proofs to be necessary. */ #include <gconv.h> -#include <stdlib.h> +#include <stdint.h> #include <string.h> +/* Definitions used in the body of the `gconv' function. */ +#define FROM_LOOP from_ascii +#define TO_LOOP to_ascii +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION dir == from_iso646 +#define PREPARE_LOOP \ + enum direction dir = ((struct iso646_data *) step->data)->dir; \ + enum variant var = ((struct iso646_data *) step->data)->var; +#define EXTRA_LOOP_ARGS , var + + /* Direction of the transformation. */ enum direction { @@ -66,22 +80,22 @@ gconv_init (struct gconv_step *step) enum variant var; int result; - if (strcasestr (step->from_name, "ANSI_X3.4-1968") != NULL) + if (__strcasestr (step->from_name, "ANSI_X3.4-1968") != NULL) { dir = from_iso646; var = US; } - else if (strcasestr (step->from_name, "BS_4730") != NULL) + else if (__strcasestr (step->from_name, "BS_4730") != NULL) { dir = from_iso646; var = GB; } - else if (strcasestr (step->to_name, "ANSI_X3.4-1968") != NULL) + else if (__strcasestr (step->to_name, "ANSI_X3.4-1968") != NULL) { dir = to_iso646; var = US; } - else if (strcasestr (step->to_name, "BS_4730") != NULL) + else if (__strcasestr (step->to_name, "BS_4730") != NULL) { dir = to_iso646; var = GB; @@ -104,6 +118,13 @@ gconv_init (struct gconv_step *step) result = GCONV_OK; } + step->min_needed_from = MIN_NEEDED_FROM; + step->max_needed_from = MIN_NEEDED_FROM; + step->min_needed_to = MIN_NEEDED_TO; + step->max_needed_to = MIN_NEEDED_TO; + + step->stateful = 0; + return result; } @@ -115,194 +136,113 @@ gconv_end (struct gconv_step *data) } -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - enum direction dir = ((struct iso646_data *) step->data)->dir; - enum variant var = ((struct iso646_data *) step->data)->var; - - do_write = 0; - - do - { - result = GCONV_OK; - - if (dir == from_iso646) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - switch ((unsigned char) inbuf[cnt]) - { - case '\x23': - if (var == GB) - *((wchar_t *) (outbuf + outwchars)) = 0xa3; - else - *((wchar_t *) (outbuf + outwchars)) = 0x23; - break; - case '\x75': - if (var == GB) - *((wchar_t *) (outbuf + outwchars)) = 0x203e; - else - *((wchar_t *) (outbuf + outwchars)) = 0x75; - break; - default: - *((wchar_t *) (outbuf + outwchars)) = - (unsigned char) inbuf[cnt]; - case '\x80' ... '\xff': - /* Illegal character. */ - result = GCONV_ILLEGAL_INPUT; - goto out_from; - } - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - out_from: - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - unsigned char *outbuf = data->outbuf; - size_t cnt = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - switch (*((wchar_t *) (inbuf + cnt))) - { - case 0x23: - if (var == GB) - goto out_to; - outbuf[outchars] = 0x23; - break; - case 0x75: - if (var == GB) - goto out_to; - outbuf[outchars] = 0x75; - break; - case 0xa3: - if (var != GB) - goto out_to; - outbuf[outchars] = 0x23; - break; - case 0x203e: - if (var != GB) - goto out_to; - outbuf[outchars] = 0x75; - break; - default: - if (*((wchar_t *) (inbuf + cnt)) > 0x7f) - goto out_to; - outbuf[outchars] = - (unsigned char) *((wchar_t *) (inbuf + cnt)); - break; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - out_to: - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (dir == from_iso646 - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written += do_write; - - return result; -} +/* First define the conversion function from ASCII to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch; \ + int failure = GCONV_OK; \ + \ + switch (*inptr) \ + { \ + case '\x23': \ + if (var == GB) \ + ch = 0xa3; \ + else \ + ch = 0x23; \ + break; \ + case '\x7e': \ + if (var == GB) \ + ch = 0x203e; \ + else \ + ch = 0x7e; \ + break; \ + default: \ + ch = *inptr; \ + break; \ + case '\x80' ... '\xff': \ + /* Illegal character. */ \ + failure = GCONV_ILLEGAL_INPUT; \ + ch = '\0'; /* OK, gcc, here I initialize the variable. */ \ + break; \ + } \ + \ + /* Hopefully gcc can recognize that the following `if' is only true \ + when we reach the default case in the `switch' statement. */ \ + if (failure == GCONV_ILLEGAL_INPUT) \ + { \ + /* Exit the loop with an error. */ \ + result = failure; \ + break; \ + } \ + *((uint32_t *) outptr)++ = ch; \ + ++inptr; \ + } +#define EXTRA_LOOP_DECLS , enum variant var +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + unsigned char ch; \ + int failure = GCONV_OK; \ + \ + do \ + { \ + switch (*((uint32_t *) inptr)) \ + { \ + case 0x23: \ + if (var == GB) \ + break; \ + ch = 0x23; \ + continue; \ + case 0x7e: \ + if (var == GB) \ + break; \ + ch = 0x7e; \ + continue; \ + case 0xa3: \ + if (var != GB) \ + break; \ + ch = 0x23; \ + continue; \ + case 0x203e: \ + if (var != GB) \ + break; \ + ch = 0x7e; \ + continue; \ + default: \ + if (*((uint32_t *) inptr) > 0x7f) \ + break; \ + ch = (unsigned char) *((uint32_t *) inptr); \ + continue; \ + } \ + /* When we come to this place we saw an illegal character. */ \ + failure = GCONV_ILLEGAL_INPUT; \ + ch = '\0'; /* OK, gcc, here I initialize the variable. */ \ + } \ + while (0); \ + \ + /* Hopefully gcc can recognize that the following `if' is only true \ + when we fall through the `switch' statement. */ \ + if (failure == GCONV_ILLEGAL_INPUT) \ + { \ + /* Exit the loop with an error. */ \ + result = failure; \ + break; \ + } \ + *outptr++ = ch; \ + inptr += 4; \ + } +#define EXTRA_LOOP_DECLS , enum variant var +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/testdata/ANSI_X3.4-1968 b/iconvdata/testdata/ANSI_X3.4-1968 new file mode 100644 index 0000000..7b7da5f --- /dev/null +++ b/iconvdata/testdata/ANSI_X3.4-1968 @@ -0,0 +1,6 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ diff --git a/iconvdata/testdata/BS_4730 b/iconvdata/testdata/BS_4730 new file mode 100644 index 0000000..7b7da5f --- /dev/null +++ b/iconvdata/testdata/BS_4730 @@ -0,0 +1,6 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ diff --git a/iconvdata/testdata/BS_4730..UTF8 b/iconvdata/testdata/BS_4730..UTF8 new file mode 100644 index 0000000..0dc3ff3 --- /dev/null +++ b/iconvdata/testdata/BS_4730..UTF8 @@ -0,0 +1,6 @@ + ! " £ $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ‾ |