diff options
Diffstat (limited to 'iconv')
-rw-r--r-- | iconv/Makefile | 10 | ||||
-rw-r--r-- | iconv/gconv.c | 53 | ||||
-rw-r--r-- | iconv/gconv.h | 14 | ||||
-rw-r--r-- | iconv/gconv_builtin.c | 17 | ||||
-rw-r--r-- | iconv/gconv_builtin.h | 21 | ||||
-rw-r--r-- | iconv/gconv_conf.c | 4 | ||||
-rw-r--r-- | iconv/gconv_db.c | 8 | ||||
-rw-r--r-- | iconv/gconv_int.h | 13 | ||||
-rw-r--r-- | iconv/gconv_open.c | 17 | ||||
-rw-r--r-- | iconv/gconv_simple.c | 1220 | ||||
-rw-r--r-- | iconv/iconv.c | 19 | ||||
-rw-r--r-- | iconv/iconv_prog.c | 9 | ||||
-rw-r--r-- | iconv/loop.c | 226 | ||||
-rw-r--r-- | iconv/skeleton.c | 328 |
14 files changed, 997 insertions, 962 deletions
diff --git a/iconv/Makefile b/iconv/Makefile index 783b1d5..e4cd0fc 100644 --- a/iconv/Makefile +++ b/iconv/Makefile @@ -21,10 +21,18 @@ # subdir := iconv +include ../Makeconfig + headers = iconv.h gconv.h routines = iconv_open iconv iconv_close \ gconv_open gconv gconv_close gconv_db gconv_conf \ - gconv_dl gconv_builtin gconv_simple + gconv_builtin gconv_simple +ifeq ($(elf),yes) +routines += gconv_dl +else +CFLAGS-gconv_db.c = -DSTATIC_GCONV +endif + distribute = gconv_builtin.h gconv_int.h others = iconv_prog diff --git a/iconv/gconv.c b/iconv/gconv.c index f8b7c80..aa58bdb 100644 --- a/iconv/gconv.c +++ b/iconv/gconv.c @@ -19,39 +19,58 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <assert.h> #include <gconv.h> +#include <sys/param.h> int internal_function -__gconv (gconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, - size_t *outbytesleft, size_t *converted) +__gconv (gconv_t cd, const char **inbuf, const char *inbufend, char **outbuf, + char *outbufend, size_t *converted) { size_t last_step = cd->nsteps - 1; - size_t oldinbytes = *inbytesleft; int result; if (cd == (gconv_t) -1L) return GCONV_ILLEGAL_DESCRIPTOR; - cd->data[last_step].outbuf = outbuf ? *outbuf : NULL; - cd->data[last_step].outbufavail = 0; - cd->data[last_step].outbufsize = *outbytesleft; + assert (converted != NULL); + *converted = 0; - if (converted != NULL) - *converted = 0; + if (inbuf == NULL || *inbuf == NULL) + /* We just flush. */ + result = (*cd->steps->fct) (cd->steps, cd->data, NULL, NULL, converted, 1); + else + { + const char *last_start; - result = (*cd->steps->fct) (cd->steps, cd->data, - inbuf ? *inbuf : NULL, inbytesleft, - converted, inbuf == NULL || *inbuf == NULL); + assert (outbuf != NULL && *outbuf != NULL); + cd->data[last_step].outbuf = *outbuf; + cd->data[last_step].outbufend = outbufend; - if (inbuf != NULL && *inbuf != NULL) - *inbuf += oldinbytes - *inbytesleft; - if (outbuf != NULL && *outbuf != NULL) - { - *outbuf += cd->data[last_step].outbufavail; - *outbytesleft -= cd->data[last_step].outbufavail; + do + { + /* See whether the input size is reasoable for the output + size. If not adjust it. */ + size_t inlen = ((inbufend - *inbuf) / cd->steps->max_needed_from + * cd->steps->max_needed_from); + + if (cd->nsteps > 1) + inlen = MIN (inlen, (((outbufend - cd->data[last_step].outbuf) + / cd->steps[last_step].max_needed_to) + * cd->steps[last_step].max_needed_to)); + + last_start = *inbuf; + result = (*cd->steps->fct) (cd->steps, cd->data, inbuf, + *inbuf + inlen, converted, 0); + } + while (result == GCONV_EMPTY_INPUT && last_start != *inbuf + && *inbuf + cd->steps->min_needed_from <= inbufend); } + if (outbuf != NULL && *outbuf != NULL) + *outbuf = cd->data[last_step].outbuf; + return result; } diff --git a/iconv/gconv.h b/iconv/gconv.h index f3f80f4..cd0e3de 100644 --- a/iconv/gconv.h +++ b/iconv/gconv.h @@ -57,8 +57,8 @@ struct gconv_loaded_object; /* Type of a conversion function. */ typedef int (*gconv_fct) __P ((struct gconv_step *, - struct gconv_step_data *, - __const char *, size_t *, size_t *, int)); + struct gconv_step_data *, __const char **, + __const char *, size_t *, int)); /* Constructor and destructor for local data for conversion step. */ typedef int (*gconv_init_fct) __P ((struct gconv_step *)); @@ -80,6 +80,13 @@ struct gconv_step gconv_init_fct init_fct; gconv_end_fct end_fct; + /* Information about the number of bytes needed or produced in this + step. This helps optimizing the buffer sizes. */ + int min_needed_from; + int max_needed_from; + int min_needed_to; + int max_needed_to; + void *data; /* Pointer to step-local data. */ }; @@ -88,8 +95,7 @@ struct gconv_step struct gconv_step_data { char *outbuf; /* Output buffer for this step. */ - size_t outbufavail; /* Bytes already available in output buffer. */ - size_t outbufsize; /* Size of output buffer. */ + char *outbufend; /* Address of first byte after the output buffer. */ int is_last; diff --git a/iconv/gconv_builtin.c b/iconv/gconv_builtin.c index 6b14804..a970fcc 100644 --- a/iconv/gconv_builtin.c +++ b/iconv/gconv_builtin.c @@ -33,15 +33,25 @@ static struct builtin_map gconv_init_fct init; gconv_end_fct end; + int min_needed_from; + int max_needed_from; + int min_needed_to; + int max_needed_to; + } map[] = { #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \ - Fct, Init, End) \ + Fct, Init, End, MinF, MaxF, MinT, MaxT) \ { \ name: Name, \ fct: Fct, \ init: Init, \ end: End, \ + \ + min_needed_from: MinF, \ + max_needed_from: MaxF, \ + min_needed_to: MinT, \ + max_needed_to: MaxT \ }, #define BUILTIN_ALIAS(From, To) @@ -66,4 +76,9 @@ __gconv_get_builtin_trans (const char *name, struct gconv_step *step) step->end_fct = map[cnt].end; step->counter = INT_MAX; step->shlib_handle = NULL; + + step->min_needed_from = map[cnt].min_needed_from; + step->max_needed_from = map[cnt].max_needed_from; + step->min_needed_to = map[cnt].min_needed_to; + step->max_needed_to = map[cnt].max_needed_to; } diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index 265dca1..3d214ff 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -26,10 +26,12 @@ BUILTIN_ALIAS ("10646-1:1993/UCS4/", "ISO-10646/UCS4/") BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4", - __gconv_transform_internal_ucs4, NULL, NULL) + __gconv_transform_internal_ucs4, NULL, NULL, + 4, 4, 4, 4) BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "INTERNAL", 1, "=ucs4->INTERNAL", - __gconv_transform_internal_ucs4, NULL, NULL) + __gconv_transform_internal_ucs4, NULL, NULL, + 4, 4, 4, 4) /* Please note that we need only one function for both direction. */ BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/") @@ -37,22 +39,27 @@ BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/") BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "ISO-10646/UTF8/", 1, "=INTERNAL->utf8", - __gconv_transform_internal_utf8, NULL, NULL) + __gconv_transform_internal_utf8, NULL, NULL, + 4, 4, 1, 6) BUILTIN_TRANSFORMATION ("ISO-10646/UTF-?8/", "ISO-10646/UTF", 13, "INTERNAL", 1, "=utf8->INTERNAL", - __gconv_transform_utf8_internal, NULL, NULL) + __gconv_transform_utf8_internal, NULL, NULL, + 1, 6, 4, 4) BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/") BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/") BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "INTERNAL", 1, "=ucs2->INTERNAL", - __gconv_transform_ucs2_internal, NULL, NULL) + __gconv_transform_ucs2_internal, NULL, NULL, + 2, 2, 4, 4) BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "ISO-10646/UCS2/", 1, "=INTERNAL->ucs2", - __gconv_transform_internal_ucs2, NULL, NULL) + __gconv_transform_internal_ucs2, NULL, NULL, + 4, 4, 2, 2) BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy", - __gconv_transform_dummy, NULL, NULL) + __gconv_transform_dummy, NULL, NULL, + 1, 1, 1, 1) diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c index c67a0d8..ae5ba19 100644 --- a/iconv/gconv_conf.c +++ b/iconv/gconv_conf.c @@ -47,7 +47,7 @@ static const char gconv_module_ext[] = MODULE_EXT; static struct gconv_module builtin_modules[] = { #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \ - Fct, Init, End) \ + Fct, Init, End, MinF, MaxF, MinT, MaxT) \ { \ from_pattern: From, \ from_constpfx: ConstPfx, \ @@ -69,7 +69,7 @@ static const char * builtin_aliases[] = { #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \ - Fct, Init, End) + Fct, Init, End, MinF, MaxF, MinT, MaxT) #define BUILTIN_ALIAS(From, To) From " " To, #include "gconv_builtin.h" diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index 9f4366b..62d8f05 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -192,6 +192,7 @@ gen_steps (struct derivation_step *best, const char *toset, ? __strdup (current->result_set) : result[step_cnt + 1].from_name); +#ifndef STATIC_GCONV if (current->code->module_name[0] == '/') { /* Load the module, return handle for it. */ @@ -212,6 +213,7 @@ gen_steps (struct derivation_step *best, const char *toset, result[step_cnt].end_fct = shlib_handle->end_fct; } else +#endif /* It's a builtin transformation. */ __gconv_get_builtin_trans (current->code->module_name, &result[step_cnt]); @@ -230,7 +232,9 @@ gen_steps (struct derivation_step *best, const char *toset, { if (result[step_cnt].end_fct != NULL) (*result[step_cnt].end_fct) (&result[step_cnt]); +#ifndef STATIC_GCONV __gconv_release_shlib (result[step_cnt].shlib_handle); +#endif } free (result); *nsteps = 0; @@ -525,6 +529,7 @@ __gconv_find_transform (const char *toset, const char *fromset, result = find_derivation (toset, toset_expand, fromset, fromset_expand, handle, nsteps); +#ifndef STATIC_GCONV /* Increment the user counter. */ if (result == GCONV_OK) { @@ -548,6 +553,7 @@ __gconv_find_transform (const char *toset, const char *fromset, } while (cnt > 0); } +#endif /* Release the lock. */ __libc_lock_unlock (lock); @@ -568,6 +574,7 @@ __gconv_close_transform (struct gconv_step *steps, size_t nsteps) { int result = GCONV_OK; +#ifndef STATIC_GCONV /* Acquire the lock. */ __libc_lock_lock (lock); @@ -583,6 +590,7 @@ __gconv_close_transform (struct gconv_step *steps, size_t nsteps) /* Release the lock. */ __libc_lock_unlock (lock); +#endif return result; } diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index a1475f8..86e892f 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -34,8 +34,8 @@ struct gconv_alias }; -/* Default size of intermediate buffers. */ -#define GCONV_DEFAULT_BUFSIZE 8160 +/* How many character should be conveted in one call? */ +#define GCONV_NCHAR_GOAL 8160 /* Structure describing one loaded shared object. This normally are @@ -99,9 +99,8 @@ extern int __gconv_close (gconv_t cd) according to rules described by CD and place up to *OUTBYTESLEFT bytes in buffer starting at *OUTBUF. Return number of written characters in *CONVERTED if this pointer is not null. */ -extern int __gconv (gconv_t __cd, const char **__inbuf, size_t *__inbytesleft, - char **__outbuf, size_t *__outbytesleft, - size_t *__converted) +extern int __gconv (gconv_t __cd, const char **__inbuf, const char *inbufend, + char **__outbuf, char *outbufend, size_t *__converted) internal_function; /* Return in *HANDLE a pointer to an array with *NSTEPS elements describing @@ -149,8 +148,8 @@ extern void __gconv_get_builtin_trans (const char *__name, #ifdef _LIBC # define __BUILTIN_TRANS(Name) \ extern int Name (struct gconv_step *__step, struct gconv_step_data *__data, \ - const char *__inbuf, size_t *__inlen, size_t *__written, \ - int __do_flush) + const char **__inbuf, const char *__inbufend, \ + size_t *__written, int __do_flush) __BUILTIN_TRANS (__gconv_transform_dummy); __BUILTIN_TRANS (__gconv_transform_ascii_internal); diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c index d82dcfe..831794f 100644 --- a/iconv/gconv_open.c +++ b/iconv/gconv_open.c @@ -62,21 +62,24 @@ __gconv_open (const char *toset, const char *fromset, gconv_t *handle) for (cnt = 0; cnt < nsteps; ++cnt) { /* If this is the last step we must not allocate an output - buffer. Signal this to the initializer. */ + buffer. */ data[cnt].is_last = cnt == nsteps - 1; /* We use the `mbstate_t' member in DATA. */ data[cnt].statep = &data[cnt].__state; /* Allocate the buffer. */ - data[cnt].outbufsize = GCONV_DEFAULT_BUFSIZE; - data[cnt].outbuf = (char *) malloc (data[cnt].outbufsize); - if (data[cnt].outbuf == NULL) + if (!data[cnt].is_last) { - res = GCONV_NOMEM; - break; + data[cnt].outbuf = + (char *) malloc (GCONV_NCHAR_GOAL + * steps[cnt].max_needed_to); + if (data[cnt].outbuf == NULL) + { + res = GCONV_NOMEM; + break; + } } - data[cnt].outbufavail = 0; } } } diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index b72e61e..f2fec12 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -35,7 +35,7 @@ /* These are definitions used by some of the functions for handling UTF-8 encoding below. */ -static const wchar_t encoding_mask[] = +static const uint32_t encoding_mask[] = { ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff }; @@ -49,8 +49,8 @@ static const unsigned char encoding_byte[] = int __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inlen, size_t *written, - int do_flush) + const char **inbuf, const char *inbufend, + size_t *written, int do_flush) { size_t do_write; @@ -60,12 +60,12 @@ __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data, do_write = 0; else { - do_write = MIN (*inlen, data->outbufsize - data->outbufavail); + do_write = MIN (inbufend - *inbuf, data->outbufend - data->outbuf); memcpy (data->outbuf, inbuf, do_write); - *inlen -= do_write; - data->outbufavail += do_write; + *inbuf -= do_write; + *data->outbuf += do_write; } /* ### TODO Actually, this number must be devided according to the @@ -83,934 +83,330 @@ __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data, format is, if any, the endianess. The Unicode/ISO 10646 says that unless some higher protocol specifies it differently, the byte order is big endian.*/ -int -__gconv_transform_internal_ucs4 (struct gconv_step *step, - struct gconv_step_data *data, - const char *inbuf, size_t *inlen, - size_t *written, int do_flush) +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs4_loop +#define TO_LOOP internal_ucs4_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs4 + + +static inline int +internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write = 0; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - /* Clear the state. */ - memset (data->statep, '\0', sizeof (mbstate_t)); - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - int save_errno = errno; - - result = GCONV_OK; - do - { - size_t n_convert = (MIN (*inlen, - (data->outbufsize - data->outbufavail)) - / sizeof (wchar_t)); - #if __BYTE_ORDER == __LITTLE_ENDIAN - /* Sigh, we have to do some real work. */ - wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail]; - size_t cnt; + /* Sigh, we have to do some real work. */ + size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt) - outbuf[cnt] = bswap_32 (((wchar_t *) inbuf)[cnt]); + for (cnt = 0; cnt < n_convert; ++cnt) + *((uint32_t *) outptr)++ = bswap_32 (*((uint32_t *) inptr)++); + *inptrp = inptr; + *outptrp = outptr; #elif __BYTE_ORDER == __BIG_ENDIAN - /* Simply copy the data. */ - memcpy (&data->outbuf[data->outbufsize], inbuf, - n_convert * sizeof (wchar_t)); + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); #else # error "This endianess is not supported." #endif - *inlen -= n_convert * sizeof (wchar_t); - inbuf += n_convert * sizeof (wchar_t); - data->outbufavail += n_convert * sizeof (wchar_t); - do_write += n_convert; - - if (*inlen > 0 && *inlen < sizeof (wchar_t)) - { - /* We have an incomplete character at the end. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - if (data->is_last) - { - /* This is the last step. */ - result = (*inlen < sizeof (wchar_t) - ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inlen >= sizeof (wchar_t) && result == GCONV_EMPTY_INPUT); - - __set_errno (save_errno); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} - - -/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ -int -__gconv_transform_ascii_internal (struct gconv_step *step, - struct gconv_step_data *data, - const char *inbuf, size_t *inlen, - size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write = 0; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - /* Clear the state. */ - memset (data->statep, '\0', sizeof (mbstate_t)); - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - const unsigned char *newinbuf = inbuf; - int save_errno = errno; - - result = GCONV_OK; - do - { - size_t actually = 0; - size_t cnt = 0; - - while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize - && cnt < *inlen) - { - if (*newinbuf > '\x7f') - { - /* This is no correct ANSI_X3.4-1968 character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - /* It's an one byte sequence. */ - *(wchar_t *) &data->outbuf[data->outbufavail] - = (wchar_t) *newinbuf; - data->outbufavail += sizeof (wchar_t); - ++actually; - - ++newinbuf; - ++cnt; - } - - /* Remember how much we converted. */ - do_write += cnt * sizeof (wchar_t); - *inlen -= cnt; - - /* Check whether an illegal character appeared. */ - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inlen > 0 && result == GCONV_EMPTY_INPUT); - - __set_errno (save_errno); - } - - if (written != NULL && data->is_last) - *written = do_write / sizeof (wchar_t); - - return result; -} - - -/* Convert from ISO 10646/UCS to ISO 646-IRV. */ -int -__gconv_transform_internal_ascii (struct gconv_step *step, - struct gconv_step_data *data, - const char *inbuf, size_t *inlen, - size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - /* Clear the state. */ - memset (data->statep, '\0', sizeof (mbstate_t)); - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - const wchar_t *newinbuf = (const wchar_t *) inbuf; - int save_errno = errno; - do_write = 0; - - result = GCONV_OK; - do - { - size_t actually = 0; - size_t cnt = 0; - - while (data->outbufavail < data->outbufsize - && cnt + 3 < *inlen) - { - if (*newinbuf < L'\0' || *newinbuf > L'\x7f') - { - /* This is no correct ANSI_X3.4-1968 character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - /* It's an one byte sequence. */ - data->outbuf[data->outbufavail++] = (char) *newinbuf; - ++actually; - - ++newinbuf; - cnt += sizeof (wchar_t); - } - - /* Remember how much we converted. */ - do_write += cnt / sizeof (wchar_t); - *inlen -= cnt; - - /* Check whether an illegal character appeared. */ - if (result != GCONV_OK) - break; - - /* Check for incomplete input. */ - if (*inlen > 0 && *inlen < sizeof (wchar_t)) - { - /* We have an incomplete character at the end. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - if (data->is_last) - { - /* This is the last step. */ - result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT; - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inlen > 0 && result == GCONV_EMPTY_INPUT); - - __set_errno (save_errno); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} - - -int -__gconv_transform_internal_utf8 (struct gconv_step *step, - struct gconv_step_data *data, - const char *inbuf, size_t *inlen, - size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - /* Clear the state. */ - memset (data->statep, '\0', sizeof (mbstate_t)); - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - const wchar_t *newinbuf = (const wchar_t *) inbuf; - int save_errno = errno; - do_write = 0; - - result = GCONV_OK; - do - { - size_t cnt = 0; - - while (data->outbufavail < data->outbufsize - && cnt * sizeof (wchar_t) + 3 < *inlen) - { - wchar_t wc = newinbuf[cnt]; - - if (wc < 0 && wc > 0x7fffffff) - { - /* This is no correct ISO 10646 character. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - if (wc < 0x80) - /* It's an one byte sequence. */ - data->outbuf[data->outbufavail++] = (char) wc; - else - { - size_t step; - size_t start; - - for (step = 2; step < 6; ++step) - if ((wc & encoding_mask[step - 2]) == 0) - break; - - if (data->outbufavail + step >= data->outbufsize) - /* Too long. */ - break; - - start = data->outbufavail; - data->outbufavail += step; - data->outbuf[start] = encoding_byte[step - 2]; - --step; - do - { - data->outbuf[start + step] = 0x80 | (wc & 0x3f); - wc >>= 6; - } - while (--step > 0); - data->outbuf[start] |= wc; - } - - ++cnt; - } - - /* Remember how much we converted. */ - do_write += cnt; - *inlen -= cnt * sizeof (wchar_t); - newinbuf += cnt; - - /* Check whether an illegal character appeared. */ - if (result != GCONV_OK) - break; - - /* Check for incomplete input. */ - if (*inlen > 0 && *inlen < sizeof (wchar_t)) - { - /* We have an incomplete character at the end. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - if (data->is_last) - { - /* This is the last step. */ - result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT; - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inlen > 0 && result == GCONV_EMPTY_INPUT); - - __set_errno (save_errno); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} - - -int -__gconv_transform_utf8_internal (struct gconv_step *step, - struct gconv_step_data *data, - const char *inbuf, size_t *inlen, - size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - /* Clear the state. */ - memset (data->statep, '\0', sizeof (mbstate_t)); - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - } - } + /* Determine the status. */ + if (*outptrp == outend) + result = GCONV_FULL_OUTPUT; + else if (*inptrp == inend) + result = GCONV_EMPTY_INPUT; else - { - int save_errno = errno; - int extra = 0; - do_write = 0; - - result = GCONV_OK; - do - { - wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail]; - size_t cnt = 0; - size_t actually = 0; - - while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize - && cnt < *inlen) - { - size_t start = cnt; - wchar_t value; - unsigned char byte; - int count; - - /* Next input byte. */ - byte = inbuf[cnt++]; - - if (byte < 0x80) - { - /* One byte sequence. */ - count = 0; - value = byte; - } - else if ((byte & 0xe0) == 0xc0) - { - count = 1; - value = byte & 0x1f; - } - else if ((byte & 0xf0) == 0xe0) - { - /* We expect three bytes. */ - count = 2; - value = byte & 0x0f; - } - else if ((byte & 0xf8) == 0xf0) - { - /* We expect four bytes. */ - count = 3; - value = byte & 0x07; - } - else if ((byte & 0xfc) == 0xf8) - { - /* We expect five bytes. */ - count = 4; - value = byte & 0x03; - } - else if ((byte & 0xfe) == 0xfc) - { - /* We expect six bytes. */ - count = 5; - value = byte & 0x01; - } - else - { - /* This is an illegal encoding. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - if (cnt + count > *inlen) - { - /* We don't have enough input. */ - --cnt; - extra = count; - break; - } - - /* Read the possible remaining bytes. */ - while (count > 0) - { - byte = inbuf[cnt++]; - --count; - - if ((byte & 0xc0) != 0x80) - { - /* This is an illegal encoding. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - - value <<= 6; - value |= byte & 0x3f; - } - - if (result != GCONV_OK) - { - cnt = start; - break; - } - - *outbuf++ = value; - ++actually; - } - - /* Remember how much we converted. */ - do_write += actually; - *inlen -= cnt; - inbuf += cnt; - - data->outbufavail += actually * sizeof (wchar_t); - - /* Check whether an illegal character appeared. */ - if (result != GCONV_OK) - { - result = GCONV_ILLEGAL_INPUT; - break; - } - - if (*inlen > 0 && *inlen < extra) - { - /* We have an incomplete character at the end. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - if (data->is_last) - { - /* This is the last step. */ - result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inlen > 0 && result == GCONV_EMPTY_INPUT); - - __set_errno (save_errno); - } + result = GCONV_INCOMPLETE_INPUT; - if (written != NULL && data->is_last) - *written = do_write; + if (converted != NULL) + converted += n_convert; return result; } +#include <iconv/skeleton.c> -int -__gconv_transform_ucs2_internal (struct gconv_step *step, - struct gconv_step_data *data, - const char *inbuf, size_t *inlen, - size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - /* Clear the state. */ - memset (data->statep, '\0', sizeof (mbstate_t)); - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - } - } - else - { - const uint16_t *newinbuf = (const uint16_t *) inbuf; - int save_errno = errno; - do_write = 0; - - do - { - wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail]; - size_t actually = 0; - - errno = 0; - while (data->outbufavail + 4 <= data->outbufsize - && *inlen >= 2) - { +/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ascii_internal_loop +#define TO_LOOP ascii_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ascii_internal + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (*inptr > '\x7f') \ + { \ + /* This is no correct ANSI_X3.4-1968 character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* It's an one byte sequence. */ \ + *((uint32_t *) outptr)++ = *inptr++; \ + } +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ascii_loop +#define TO_LOOP internal_ascii_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ascii + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (*((uint32_t *) inptr) > '\x7f') \ + { \ + /* This is no correct ANSI_X3.4-1968 character. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* It's an one byte sequence. */ \ + *outptr++ = *((uint32_t *) inptr)++; \ + } +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UTF-8. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define MAX_NEEDED_TO 6 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_utf8_loop +#define TO_LOOP internal_utf8_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_utf8 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t wc = *((uint32_t *) inptr); \ + \ + /* Since we control every character we read this cannot happen. */ \ + assert (wc <= 0x7fffffff); \ + \ + if (wc < 0x80) \ + /* It's an one byte sequence. */ \ + *outptr++ = (unsigned char) wc; \ + else \ + { \ + size_t step; \ + char *start; \ + \ + for (step = 2; step < 6; ++step) \ + if ((wc & encoding_mask[step - 2]) == 0) \ + break; \ + \ + if (outptr + step >= outend) \ + { \ + /* Too long. */ \ + result = GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + start = outptr; \ + *outptr = encoding_byte[step - 2]; \ + outptr += step; \ + --step; \ + do \ + { \ + start[step] = 0x80 | (wc & 0x3f); \ + wc >>= 6; \ + } \ + while (--step > 0); \ + start[0] |= wc; \ + } \ + \ + inptr += 4; \ + } +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UTF-8 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 6 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP utf8_internal_loop +#define TO_LOOP utf8_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_utf8_internal + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch; \ + uint_fast32_t cnt; \ + uint_fast32_t i; \ + \ + /* Next input byte. */ \ + ch = *inptr; \ + \ + if (ch < 0x80) \ + /* One byte sequence. */ \ + cnt = 1; \ + else if ((ch & 0xe0) == 0xc0) \ + { \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if ((ch & 0xf0) == 0xe0) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if ((ch & 0xf8) == 0xf0) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if ((ch & 0xfc) == 0xf8) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else if ((ch & 0xfe) == 0xfc) \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + else \ + { \ + /* This is an illegal encoding. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + if (NEED_LENGTH_TEST && inptr + cnt >= inend) \ + { \ + /* We don't have enough input. */ \ + result = GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + /* Read the possible remaining bytes. */ \ + for (i = 1; i < cnt; ++i) \ + { \ + uint32_t byte = inptr[i]; \ + \ + if ((byte & 0xc0) != 0x80) \ + { \ + /* This is an illegal encoding. */ \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ch <<= 6; \ + ch |= byte & 0x3f; \ + } \ + \ + /* Now adjust the pointers and store the result. */ \ + inptr += cnt; \ + *((uint32_t *) outptr)++ = ch; \ + } +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UCS2 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs2_internal_loop +#define TO_LOOP ucs2_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs2_internal + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP #if __BYTE_ORDER == __LITTLE_ENDIAN - outbuf[actually++] = (wchar_t) bswap_16 (*newinbuf++); +# define BODY \ + *((uint32_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)++); #else - outbuf[actually++] = (wchar_t) *newinbuf++; +# define BODY \ + *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++; #endif - data->outbufavail += 4; - *inlen -= 2; - } - - /* Remember how much we converted. */ - do_write += actually * sizeof (wchar_t); - - if (*inlen == 1) - { - /* We have an incomplete character at the end. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - /* Check whether an illegal character appeared. */ - if (errno != 0) - { - result = GCONV_ILLEGAL_INPUT; - break; - } - - if (data->is_last) - { - /* This is the last step. */ - result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inlen > 0 && result == GCONV_EMPTY_INPUT); - - __set_errno (save_errno); - } - - if (written != NULL && data->is_last) - *written = do_write; - - return result; -} - - -int -__gconv_transform_internal_ucs2 (struct gconv_step *step, - struct gconv_step_data *data, - const char *inbuf, size_t *inlen, - size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - /* Clear the state. */ - memset (data->statep, '\0', sizeof (mbstate_t)); - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - const wchar_t *newinbuf = (const wchar_t *) inbuf; - int save_errno = errno; - do_write = 0; - - do - { - uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail]; - size_t actually = 0; - - errno = 0; - - while (data->outbufavail + 2 <= data->outbufsize - && *inlen >= 4) - { - if (*newinbuf >= 0x10000) - { - __set_errno (EILSEQ); - break; - } +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UCS2. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs2_loop +#define TO_LOOP internal_ucs2_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs2 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP #if __BYTE_ORDER == __LITTLE_ENDIAN - /* Please note that we use the `uint32_t' pointer as a - `uint16_t' pointer which works since we are on a - little endian machine. */ - outbuf[actually++] = bswap_16 (*((uint16_t *) newinbuf)); - ++newinbuf; +# define BODY \ + { \ + if (*((uint32_t *) inptr) >= 0x10000) \ + { \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + /* Please note that we use the `uint32_t' from-pointer as an `uint16_t' \ + pointer which works since we are on a little endian machine. */ \ + *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)); \ + inptr += 4; \ + } #else - outbuf[actually++] = *newinbuf++; +# define BODY \ + { \ + if (*((uint32_t *) inptr) >= 0x10000) \ + { \ + result = GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \ + } #endif - *inlen -= 4; - data->outbufavail += 2; - } - - /* Remember how much we converted. */ - do_write += (const char *) newinbuf - inbuf; - - if (*inlen > 0 && *inlen < 4) - { - /* We have an incomplete input character. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - - /* Check whether an illegal character appeared. */ - if (errno != 0) - { - result = GCONV_ILLEGAL_INPUT; - break; - } - - if (data->is_last) - { - /* This is the last step. */ - result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT; - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - { - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - } - while (*inlen > 0 && result == GCONV_EMPTY_INPUT); - - __set_errno (save_errno); - } - - if (written != NULL && data->is_last) - *written = do_write / sizeof (wchar_t); - - return result; -} +#include <iconv/loop.c> +#include <iconv/skeleton.c> diff --git a/iconv/iconv.c b/iconv/iconv.c index fc0ed41..2f57295 100644 --- a/iconv/iconv.c +++ b/iconv/iconv.c @@ -32,10 +32,27 @@ iconv (iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { gconv_t gcd = (gconv_t) cd; + char *outstart = outbuf ? *outbuf : NULL; size_t converted; int result; - result = __gconv (gcd, inbuf, inbytesleft, outbuf, outbytesleft, &converted); + if (inbuf == NULL || *inbuf == NULL) + { + result = __gconv (gcd, NULL, NULL, outbuf, outstart + *outbytesleft, + &converted); + } + else + { + const char *instart = *inbuf; + + result = __gconv (gcd, inbuf, *inbuf + *inbytesleft, outbuf, + *outbuf + *outbytesleft, &converted); + + *inbytesleft -= *inbuf - instart; + } + if (outstart != NULL) + *outbytesleft -= *outbuf - outstart; + switch (result) { case GCONV_ILLEGAL_DESCRIPTOR: diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 569bd3b..2452a88 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -299,12 +299,15 @@ process_block (iconv_t cd, const char *addr, size_t len, FILE *output) { #define OUTBUF_SIZE 32768 char outbuf[OUTBUF_SIZE]; - char *outptr = outbuf; - size_t outlen = OUTBUF_SIZE; + char *outptr; + size_t outlen; + size_t n; while (len > 0) { - size_t n = iconv (cd, &addr, &len, &outptr, &outlen); + outptr = outbuf; + outlen = OUTBUF_SIZE; + n = iconv (cd, &addr, &len, &outptr, &outlen); if (outptr != outbuf) { diff --git a/iconv/loop.c b/iconv/loop.c new file mode 100644 index 0000000..b8657d5 --- /dev/null +++ b/iconv/loop.c @@ -0,0 +1,226 @@ +/* Conversion loop frame work. + Copyright (C) 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* This file provides a frame for the reader loop in all conversion modules. + The actual code must (of course) be provided in the actual module source + code but certain actions can be written down generically, with some + customization options which are these: + + MIN_NEEDED_INPUT minimal number of input bytes needed for the next + conversion. + MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round + of conversion. + + MAX_NEEDED_INPUT you guess it, this is the maximal number of input + bytes needed. It defaults to MIN_NEEDED_INPUT + MAX_NEEDED_OUTPUT likewise for output bytes. + + Both values have a default of 1. + + LOOPFCT name of the function created. If not specified + the name is `loop' but this prevents the use + of multiple functions in the same file. + + COUNT_CONVERTED optional macro which is used to count the actual + number of characters converted. For some conversion + it is easy to compute the value afterwards, but for + others explicit counting is cheaper. + + BODY this is supposed to expand to the body of the loop. + The user must provide this. +*/ + +#include <gconv.h> +#include <sys/param.h> /* For MIN. */ +#define __need_size_t +#include <stddef.h> + + +/* We need at least one byte for the next round. */ +#ifndef MIN_NEEDED_INPUT +# define MIN_NEEDED_INPUT 1 +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_INPUT +# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT +#endif + +/* We produce at least one byte in the next round. */ +#ifndef MIN_NEEDED_OUTPUT +# define MIN_NEEDED_OUTPUT 1 +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_OUTPUT +# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT +#endif + +/* Default name for the function. */ +#ifndef LOOPFCT +# define LOOPFCT loop +#endif + +/* Make sure we have a loop body. */ +#ifndef BODY +# error "Definition of BODY missing for function" LOOPFCT +#endif + +/* We can calculate the number of converted characters easily if one + of the character sets has a fixed width. */ +#ifndef COUNT_CONVERTED +# if MIN_NEEDED_INPUT == MAX_NEEDED_INPUT +# if MIN_NEEDED_OUTPUT == MAX_NEEDED_OUTPUT +/* Decide whether one of the charsets has size 1. */ +# if MIN_NEEDED_INPUT == 1 +# define COUNT_CONVERTED (inptr - *inptrp) +# elif MIN_NEEDED_OUTPUT == 1 +# define COUNT_CONVERTED (outptr - *outptrp) +# else +/* Else we should see whether one of the two numbers is a power of 2. */ +# define COUNT_CONVERTED \ + ((MIN_NEEDED_INPUT & (-MIN_NEEDED_INPUT)) == MIN_NEEDED_INPUT \ + ? (inptr - *inptrp) : (outptr - *outptrp)) +# endif +# else +# define COUNT_CONVERTED (inptr - *inptrp) +# endif +# elif MIN_NEEDED_OUTPUT == MAX_NEEDED_OUTPUT +# define COUNT_CONVERTED (outptr - *outptrp) +# endif +#endif + + +/* The function returns the status, as defined in gconv.h. */ +static inline int +LOOPFCT (const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, mbstate_t *state, + void *data, size_t *converted) +{ + int result = GCONV_OK; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; +#ifndef COUNT_CONVERTED + size_t done = 0; +#endif + + /* We run one loop where we avoid checks for underflow/overflow of the + buffers to speed up the conversion a bit. */ + size_t min_in_rounds = (inend - inptr) / MAX_NEEDED_INPUT; + size_t min_out_rounds = (outend - outptr) / MAX_NEEDED_OUTPUT; + size_t min_rounds = MIN (min_in_rounds, min_out_rounds); + +#undef NEED_LENGTH_TEST +#define NEED_LENGTH_TEST 0 + while (min_rounds-- > 0) + { + /* Here comes the body the user provides. It can stop with RESULT + set to GCONV_INCOMPLETE_INPUT (if the size of the input characters + vary in size), GCONV_ILLEGAL_INPUT, or GCONV_FULL_OUTPUT (if the + output characters vary in size. */ + BODY + + /* If necessary count the successful conversion. */ +#ifndef COUNT_CONVERTED + ++done; +#endif + } + + if (result == GCONV_OK) + { +#if MIN_NEEDED_INPUT == MAX_NEEDED_INPUT \ + && MIN_NEEDED_OUTPUT == MAX_NEEDED_OUTPUT + /* We don't need to start another loop since we were able to determine + the maximal number of characters to copy in advance. What remains + to be determined is the status. */ + if (inptr == inend) + /* No more input. */ + result = GCONV_EMPTY_INPUT; + else if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) + || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) + /* Overflow in the output buffer. */ + result = GCONV_FULL_OUTPUT; + else + /* We have something left in the input buffer. */ + result = GCONV_INCOMPLETE_INPUT; +#else + result = GCONV_EMPTY_INPUT; + +# undef NEED_LENGTH_TEST +# define NEED_LENGTH_TEST 1 + while (inptr != inend) + { + /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the + compiler generating better code. It will optimized away + since MIN_NEEDED_OUTPUT is always a constant. */ + if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) + || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) + { + /* Overflow in the output buffer. */ + result = GCONV_FULL_OUTPUT; + break; + } + if (MIN_NEEDED_INPUT > 1 && inptr + MIN_NEEDED_INPUT > inend) + { + /* We don't have enough input for another complete input + character. */ + result = GCONV_INCOMPLETE_INPUT; + break; + } + + /* Here comes the body the user provides. It can stop with + RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the + input characters vary in size), GCONV_ILLEGAL_INPUT, or + GCONV_FULL_OUTPUT (if the output characters vary in size. */ + BODY + + /* If necessary count the successful conversion. */ +# ifndef COUNT_CONVERTED + ++done; +# endif + } +#endif /* Input and output charset are not both fixed width. */ + } + + /* Add the number of characters we actually converted. */ +#ifdef COUNT_CONVERTED + *converted += COUNT_CONVERTED; +#else + *converted += done; +#endif + + /* Update the pointers pointed to by the parameters. */ + *inptrp = inptr; + *outptrp = outptr; + + return result; +} + + +/* We remove the macro definitions so that we can include this file again + for the definition of another function. */ +#undef MIN_NEEDED_INPUT +#undef MAX_NEEDED_INPUT +#undef MIN_NEEDED_OUTPUT +#undef MAX_NEEDED_OUTPUT +#undef LOOPFCT +#undef COUNT_CONVERTED +#undef BODY +#undef LOOPFCT diff --git a/iconv/skeleton.c b/iconv/skeleton.c new file mode 100644 index 0000000..3582f14 --- /dev/null +++ b/iconv/skeleton.c @@ -0,0 +1,328 @@ +/* Skeleton for a converison module. + Copyright (C) 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* This file can be included to provide definitions of several things + many modules have in common. It can be customized using the following + macros: + + DEFINE_INIT define the default initializer. This requires the + following symbol to be defined. + + CHARSET_NAME string with official name of the coded character + set (in all-caps) + + DEFINE_FINI define the default destructor function. + + MIN_NEEDED_FROM minimal number of bytes needed for the from-charset. + MIN_NEEDED_TO likewise for the to-charset. + + MAX_NEEDED_FROM maximal number of bytes needed for the from-charset. + This macro is optional, it defaults to MIN_NEEDED_FROM. + MAX_NEEDED_TO likewise for the to-charset. + + DEFINE_DIRECTION_OBJECTS + two objects will be defined to be used when the + `gconv' function must only distinguish two + directions. This is implied by DEFINE_INIT. + If this macro is not defined the following + macro must be available. + + FROM_DIRECTION this macro is supposed to return a value != 0 + if we convert from the current character set, + otherwise it return 0. + + EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it + defines some code which writes out a sequence + of characters which bring the current state into + the initial state. + + FROM_LOOP name of the function implementing the conversion + from the current characters. + TO_LOOP likewise for the other direction + + RESET_STATE in case of an error we must reset the state for + the rerun so this macro must be defined for + stateful encodings. It takes an argument which + is nonzero when saving. + + RESET_INPUT_BUFFER If the input character sets allow this the macro + can be defined to reset the input buffer pointers + to cover only those characters up to the error. + + FUNCTION_NAME if not set the conversion function is named `gconv'. + */ + +#include <assert.h> +#include <gconv.h> +#include <string.h> +#define __need_size_t +#define __need_NULL +#include <stddef.h> + + +/* The direction objects. */ +#if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT +static int from_object; +static int to_object; + +# ifndef FROM_DIRECTION +# define FROM_DIRECTION step->data == &from_object +# endif +#else +# ifndef FROM_DIRECTION +# error "FROM_DIRECTION must be provided if direction objects are not used" +# endif +#endif + + +/* How many bytes are needed at most for the from-charset. */ +#ifndef MAX_NEEDED_FROM +# define MAX_NEEDED_FROM MIN_NEEDED_FROM +#endif + +/* Same for the to-charset. */ +#ifndef MAX_NEEDED_TO +# define MAX_NEEDED_TO MIN_NEEDED_TO +#endif + + +/* For conversions from a fixed width character sets to another fixed width + character set we we can define RESET_INPUT_BUFFER is necessary. */ +#if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE +# if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO +/* We have to used these `if's here since the compiler cannot know that + (outbuf - outerr) is always divisible by MIN_NEEDED_TO. */ +# define RESET_INPUT_BUFFER \ + if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \ + *inbuf -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \ + else if (MIN_NEEDED_TO % MIN_NEEDED_FROM == 0) \ + *inbuf -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \ + else \ + *inbuf -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM +# endif +#endif + + +/* The default init function. It simply matches the name and initializes + the step data to point to one of the objects above. */ +#if DEFINE_INIT +# ifndef CHARSET_NAME +# error "CHARSET_NAME not defined" +# endif + +int +gconv_init (struct gconv_step *step) +{ + /* Determine which direction. */ + if (__strcasestr (step->from_name, CHARSET_NAME) != NULL) + step->data = &from_object; + else if (__strcasestr (step->to_name, CHARSET_NAME) != NULL) + step->data = &to_object; + else + return GCONV_NOCONV; + + step->min_needed_from = MIN_NEEDED_FROM; + step->max_needed_from = MAX_NEEDED_FROM; + step->min_needed_to = MIN_NEEDED_TO; + step->max_needed_to = MAX_NEEDED_TO; + + return GCONV_OK; +} +#endif + + +/* The default destructor function does nothing in the moment and so + be define it at all. But we still provide the macro just in case + we need it some day. */ +#if DEFINE_FINI +#endif + + +/* This is the actual conversion function. */ +#ifndef FUNCTION_NAME +# define FUNCTION_NAME gconv +#endif + +int +FUNCTION_NAME (struct gconv_step *step, struct gconv_step_data *data, + const char **inbuf, const char *inbufend, size_t *written, + int do_flush) +{ + struct gconv_step *next_step = step + 1; + struct gconv_step_data *next_data = data + 1; + gconv_fct fct = next_step->fct; + int status; + + /* If the function is called with no input this means we have to reset + to the initial state. The possibly partly converted input is + dropped. */ + if (do_flush) + { + /* Call the steps down the chain if there are any. */ + if (data->is_last) + status = GCONV_OK; + else + { +#ifdef EMIT_SHIFT_TO_INIT + status = GCONV_OK; + + EMIT_SHIFT_TO_INIT; + + if (status == GCONV_OK) +#endif + /* Give the modules below the same chance. */ + status = (*fct) (next_step, next_data, NULL, NULL, written, 1); + } + } + else + { + /* This variable is used to count the number of characters we + actually converted. */ + size_t converted = 0; + + /* We preserve the initial values of the pointer variables. */ + const char *inptr = *inbuf; + char *outbuf = data->outbuf; + char *outend = data->outbufend; + char *outptr; + + do + { + /* Remember the start value for this round. */ + inptr = *inbuf; + /* The outbuf buffer is empty. */ + outptr = outbuf; + + /* Save the state. */ +#ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +#endif + + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = FROM_LOOP ((const unsigned char **) inbuf, + (const unsigned char *) inbufend, + (unsigned char **) &outbuf, + (unsigned char *) outend, + data->statep, step->data, &converted); + else + /* Run the conversion loop. */ + status = TO_LOOP ((const unsigned char **) inbuf, + (const unsigned char *) inbufend, + (unsigned char **) &outbuf, + (unsigned char *) outend, + data->statep, step->data, &converted); + + /* If this is the last step leave the loop, there is nothgin + we can do. */ + if (data->is_last) + { + /* Store information about how many bytes are available. */ + data->outbuf = outbuf; + break; + } + + /* Write out all output which was produced. */ + if (outbuf > outptr) + { + const char *outerr = outbuf; + int result; + + result = (*fct) (next_step, next_data, &outerr, outbuf, + written, 0); + + if (result != GCONV_EMPTY_INPUT) + { + if (outerr != outbuf) + { +#ifdef RESET_INPUT_BUFFER + RESET_INPUT_BUFFER; +#else + /* We have a problem with the in on of the functions + below. Undo the conversion upto the error point. */ + size_t nstatus; + + /* Reload the pointers. */ + *inbuf = inptr; + outbuf = outptr; + + /* Reset the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = FROM_LOOP ((const unsigned char **) inbuf, + (const unsigned char *) inbufend, + (unsigned char **) &outbuf, + (unsigned char *) outerr, + data->statep, step->data, + &converted); + else + /* Run the conversion loop. */ + nstatus = TO_LOOP ((const unsigned char **) inbuf, + (const unsigned char *) inbufend, + (unsigned char **) &outbuf, + (unsigned char *) outerr, + data->statep, step->data, + &converted); + + /* We must run out of output buffer space in this + rerun. */ + assert (nstatus == GCONV_FULL_OUTPUT + && outbuf == outerr); +#endif /* reset input buffer */ + } + + /* Change the status. */ + status = result; + } + else + /* All the output is consumed, we can make another run + if everything was ok. */ + if (status == GCONV_FULL_OUTPUT) + status = GCONV_OK; + } + } + while (status == GCONV_OK); + + /* Remember how many characters we converted. */ + *written += converted; + } + + return status; +} + +#undef DEFINE_INIT +#undef CHARSET_NAME +#undef DEFINE_FINI +#undef MIN_NEEDED_FROM +#undef MIN_NEEDED_TO +#undef MAX_NEEDED_FROM +#undef MAX_NEEDED_TO +#undef DEFINE_DIRECTION_OBJECTS +#undef FROM_DIRECTION +#undef EMIT_SHIFT_TO_INIT +#undef FROM_LOOP +#undef TO_LOOP +#undef RESET_STATE +#undef RESET_INPUT_BUFFER +#undef FUNCTION_NAME |