From 55985355ade2a038b567dd9b58153a98384ae703 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 12 Jun 2000 19:47:50 +0000 Subject: Update. 2000-06-12 Ulrich Drepper * Rules (%.out): Define GCONV_PATH in the environment. * assert/Depend: New file. * iconvdata/Depend: New file. * intl/Depend: New file. * timezone/Makefile (build-testdata): Add GCONV_PATH to environment. * intl/tst-gettext.sh: Likewise. * iconv/Makefile (routines): Add gconv_trans. * iconv/gconv_trans.c: New file. * iconv/gconv.h (struct __gconv_trans_data): New type. (__gconv_fct): New parameter with starting position in output buffer. (__gconv_trans_fct, __gconv_trans_context_fct, __gconv_trans_query_fct, __gconv_trans_init_fct, __gconv_trans_end_fct): New types. (struct __gconv_step): Add new member __trans. * iconv/gconv_int.h: Pretty print prototypes. (gconv_transliterate): New prototype. (__BUILTIN_TRANS): Update for new conversion function interface. * iconv/gconv.c (__gconv): Pass new parameter to conversion function. * iconv/gconv_open.c (__gconv_open): Recognize error handling suffix in names, find appropriate function, and install in the conversion steps it can be used. * iconv/skeleton.c: Add additional parameter for beginning of output buffer. Change calls of downstream functions. * iconv/loop.c: Change loop function interface completely. Pass in step and step_data structure. Remove optimization for BODY with NEED_LENGTH_TEST == 0. * iconv/gconv_simple.c: Update interfaces of functions. Insert appropriate error handling code to use transliteration steps. Remove optimization for BODY with NEED_LENGTH_TEST == 0. * iconvdata/8bit-gap.c: Likewise. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/big5hkscs.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbgbk.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso8859-1.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * libio/iofwide.c: Adjust to new interface of gconv functions. Use DL_CALL_FCT. * wcsmbs/btowc.c: Likewise. * wcsmbs/mbrtowc.c: Likewise. * wcsmbs/mbsnrtowcs.c: Likewise. * wcsmbs/mbsrtowcs.c: Likewise. * wcsmbs/wcrtomb.c: Likewise. * wcsmbs/wcsnrtombs.c: Likewise. * wcsmbs/wcsrtombs.c: Likewise. * wcsmbs/wctob.c: Likewise. --- iconv/Makefile | 2 +- iconv/gconv.c | 8 +-- iconv/gconv.h | 39 +++++++++++- iconv/gconv_int.h | 48 ++++++++------ iconv/gconv_open.c | 102 ++++++++++++++++++++++++++--- iconv/gconv_simple.c | 177 ++++++++++++++++++++++++++++++++++++++------------- iconv/gconv_trans.c | 50 +++++++++++++++ iconv/loop.c | 129 ++++++++++++++++--------------------- iconv/skeleton.c | 60 ++++++++--------- 9 files changed, 427 insertions(+), 188 deletions(-) create mode 100644 iconv/gconv_trans.c (limited to 'iconv') diff --git a/iconv/Makefile b/iconv/Makefile index 14076e6..6af661c 100644 --- a/iconv/Makefile +++ b/iconv/Makefile @@ -26,7 +26,7 @@ include ../Makeconfig headers = iconv.h gconv.h routines = iconv_open iconv iconv_close \ gconv_open gconv gconv_close gconv_db gconv_conf \ - gconv_builtin gconv_simple + gconv_builtin gconv_simple gconv_trans ifeq ($(elf),yes) routines += gconv_dl else diff --git a/iconv/gconv.c b/iconv/gconv.c index 06e212b..19f9562 100644 --- a/iconv/gconv.c +++ b/iconv/gconv.c @@ -46,8 +46,8 @@ __gconv (__gconv_t cd, const unsigned char **inbuf, if (inbuf == NULL || *inbuf == NULL) /* We just flush. */ result = DL_CALL_FCT (cd->__steps->__fct, - (cd->__steps, cd->__data, NULL, NULL, - irreversible, 1, 0)); + (cd->__steps, cd->__data, NULL, NULL, + cd->__data[0].__outbuf, irreversible, 1, 0)); else { const unsigned char *last_start; @@ -58,8 +58,8 @@ __gconv (__gconv_t cd, const unsigned char **inbuf, { last_start = *inbuf; result = DL_CALL_FCT (cd->__steps->__fct, - (cd->__steps, cd->__data, inbuf, inbufend, - irreversible, 0, 0)); + (cd->__steps, cd->__data, inbuf, inbufend, + cd->__data[0].__outbuf, irreversible, 0, 0)); } while (result == __GCONV_EMPTY_INPUT && last_start != *inbuf && *inbuf + cd->__steps->__min_needed_from <= inbufend); diff --git a/iconv/gconv.h b/iconv/gconv.h index 1821844..5717ddb 100644 --- a/iconv/gconv.h +++ b/iconv/gconv.h @@ -62,18 +62,52 @@ enum struct __gconv_step; struct __gconv_step_data; struct __gconv_loaded_object; +struct __gconv_trans_data; /* Type of a conversion function. */ typedef int (*__gconv_fct) (struct __gconv_step *, struct __gconv_step_data *, __const unsigned char **, __const unsigned char *, - size_t *, int, int); + unsigned char *, size_t *, int, int); /* Constructor and destructor for local data for conversion step. */ typedef int (*__gconv_init_fct) (struct __gconv_step *); typedef void (*__gconv_end_fct) (struct __gconv_step *); +/* Type of a transliteration/transscription function. */ +typedef int (*__gconv_trans_fct) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + __const unsigned char *, + __const unsigned char **, + __const unsigned char *, unsigned char *, + unsigned char **, unsigned char *, size_t *); + +/* Function to call to provide transliteration module with context. */ +typedef int (*__gconv_trans_context_fct) (struct __gconv_trans_data *data, + __const unsigned char *, + __const unsigned char *, + __const unsigned char *, + unsigned char *, unsigned char *, + unsigned char *); + +/* Function to query module about supported encoded character sets. */ +typedef int (*__gconv_trans_query_fct) (__const char **, size_t *); + +/* Constructor and destructor for local data for transliteration. */ +typedef int (*__gconv_trans_init_fct) (void **, const char *); +typedef void (*__gconv_trans_end_fct) (void *); + +struct __gconv_trans_data +{ + /* Transliteration/Transscription function. */ + __gconv_trans_fct __trans_fct; + __gconv_trans_context_fct __trans_context_fct; + __gconv_trans_end_fct __trans_end_fct; + void *__data; +}; + + /* Description of a conversion step. */ struct __gconv_step { @@ -124,6 +158,9 @@ struct __gconv_step_data __mbstate_t *__statep; __mbstate_t __state; /* This element must not be used directly by any module; always use STATEP! */ + + /* Transliteration information. */ + struct __gconv_trans_data __trans; }; diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 01cebe7..87287d7 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -129,8 +129,8 @@ extern struct gconv_module *__gconv_modules_db; /* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ -extern int __gconv_open (const char *__toset, const char *__fromset, - __gconv_t *__handle, int flags) +extern int __gconv_open (const char *toset, const char *fromset, + __gconv_t *handle, int flags) internal_function; /* Free resources associated with transformation descriptor CD. */ @@ -141,55 +141,65 @@ extern int __gconv_close (__gconv_t cd) according to rules described by CD and place up to *OUTBYTESLEFT bytes in buffer starting at *OUTBUF. Return number of non-identical conversions in *IRREVERSIBLE if this pointer is not null. */ -extern int __gconv (__gconv_t __cd, const unsigned char **__inbuf, - const unsigned char *inbufend, unsigned char **__outbuf, +extern int __gconv (__gconv_t cd, const unsigned char **inbuf, + const unsigned char *inbufend, unsigned char **outbuf, unsigned char *outbufend, size_t *irreversible) internal_function; /* Return in *HANDLE a pointer to an array with *NSTEPS elements describing the single steps necessary for transformation from FROMSET to TOSET. */ -extern int __gconv_find_transform (const char *__toset, const char *__fromset, - struct __gconv_step **__handle, - size_t *__nsteps, int flags) +extern int __gconv_find_transform (const char *toset, const char *fromset, + struct __gconv_step **handle, + size_t *nsteps, int flags) internal_function; /* Read all the configuration data and cache it. */ extern void __gconv_read_conf (void); /* Comparison function to search alias. */ -extern int __gconv_alias_compare (const void *__p1, const void *__p2); +extern int __gconv_alias_compare (const void *p1, const void *p2); /* Clear reference to transformation step implementations which might cause the code to be unloaded. */ -extern int __gconv_close_transform (struct __gconv_step *__steps, - size_t __nsteps) +extern int __gconv_close_transform (struct __gconv_step *steps, + size_t nsteps) internal_function; /* Load shared object named by NAME. If already loaded increment reference count. */ -extern struct __gconv_loaded_object *__gconv_find_shlib (const char *__name) +extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name) internal_function; /* Release shared object. If no further reference is available unload the object. */ -extern int __gconv_release_shlib (struct __gconv_loaded_object *__handle) +extern int __gconv_release_shlib (struct __gconv_loaded_object *handle) internal_function; /* Fill STEP with information about builtin module with NAME. */ -extern void __gconv_get_builtin_trans (const char *__name, - struct __gconv_step *__step) +extern void __gconv_get_builtin_trans (const char *name, + struct __gconv_step *step) internal_function; +/* Transliteration using the locale's data. */ +extern int gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + __const unsigned char *inbufstart, + __const unsigned char **inbufp, + __const unsigned char *inbufend, + unsigned char *outbufstart, + unsigned char **outbufp, + unsigned char *outbufend, + size_t *irreversible); /* Builtin transformations. */ #ifdef _LIBC # define __BUILTIN_TRANS(Name) \ - extern int Name (struct __gconv_step *__step, \ - struct __gconv_step_data *__data, \ - const unsigned char **__inbuf, \ - const unsigned char *__inbufend, size_t *__written, \ - int __do_flush, int __consume_incomplete) + extern int Name (struct __gconv_step *step, \ + struct __gconv_step_data *data, \ + const unsigned char **inbuf, \ + const unsigned char *inbufend, unsigned char *outbufstart, \ + size_t *irreversible, int do_flush, int consume_incomplete) __BUILTIN_TRANS (__gconv_transform_ascii_internal); __BUILTIN_TRANS (__gconv_transform_internal_ascii); diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c index da00b1a..984ca9d 100644 --- a/iconv/gconv_open.c +++ b/iconv/gconv_open.c @@ -36,25 +36,65 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, size_t cnt = 0; int res; int conv_flags = 0; - const char *runp; + const char *errhand; - /* Find out whether "IGNORE" is part of the options in the `toset' - name. If yes, remove the string and remember this in the flag. */ - runp = __strchrnul (__strchrnul (toset, '/'), '/'); - if (strcmp (runp, "IGNORE") == 0) + /* Find out whether any error handling method is specified. */ + errhand = strchr (toset, '/'); + if (errhand != NULL) + errhand = strchr (errhand + 1, '/'); + if (__builtin_expect (errhand != NULL, 1)) { - /* Found it. This means we should ignore conversion errors. */ - char *newtoset = (char *) alloca (runp - toset + 1); + if (errhand[1] == '\0') + errhand = NULL; + else + { + /* Make copy without the error handling description. */ + char *newtoset = (char *) alloca (errhand - toset + 1); - newtoset[runp - toset] = '\0'; - toset = memcpy (newtoset, toset, runp - toset); + newtoset[errhand - toset] = '\0'; + toset = memcpy (newtoset, toset, errhand - toset); - flags = __GCONV_IGNORE_ERRORS; + flags = __GCONV_IGNORE_ERRORS; + + if (strcasecmp (errhand, "IGNORE") == 0) + { + /* Found it. This means we should ignore conversion errors. */ + flags = __GCONV_IGNORE_ERRORS; + errhand = NULL; + } + } } res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); if (res == __GCONV_OK) { + const char **csnames = NULL; + size_t ncsnames = 0; + __gconv_trans_fct trans_fct = NULL; + __gconv_trans_context_fct trans_context_fct = NULL; + __gconv_trans_init_fct trans_init_fct = NULL; + __gconv_trans_end_fct trans_end_fct = NULL; + + if (errhand != NULL) + { + /* Find the appropriate transliteration handling. */ + if (strcasecmp (errhand, "TRANSLIT") == 0) + { + /* It's the builtin transliteration handling. We only + suport for it working on the internal encoding. */ + static const char *internal_trans_names[1] = { "INTERNAL" }; + + csnames = internal_trans_names; + ncsnames = 1; + trans_fct = gconv_transliterate; + /* No context, init, or end function. */ + } + else if (strcasecmp (errhand, "WORK AROUND A GCC BUG") == 0) + { + trans_init_fct = (__gconv_trans_init_fct) 1; + } + } + /* Allocate room for handle. */ result = (__gconv_t) malloc (sizeof (struct __gconv_info) + (nsteps @@ -63,6 +103,8 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, res = __GCONV_NOMEM; else { + size_t n; + /* Remember the list of steps. */ result->__steps = steps; result->__nsteps = nsteps; @@ -105,6 +147,26 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, } result->__data[cnt].__outbufend = result->__data[cnt].__outbuf + size; + + /* Now see whether we can use the transliteration module + for this step. */ + for (n = 0; n < ncsnames; ++n) + if (strcasecmp (steps[cnt].__from_name, csnames[n]) == 0) + { + /* Match! Now try the initializer. */ + if (trans_init_fct == NULL + || (trans_init_fct (&result->__data[cnt].__trans.__data, + steps[cnt].__to_name) + == __GCONV_OK)) + { + result->__data[cnt].__trans.__trans_fct = trans_fct; + result->__data[cnt].__trans.__trans_context_fct = + trans_context_fct; + result->__data[cnt].__trans.__trans_end_fct = + trans_end_fct; + } + break; + } } /* Now handle the last entry. */ @@ -116,6 +178,26 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, result->__data[cnt].__internal_use = 0; #endif result->__data[cnt].__statep = &result->__data[cnt].__state; + + /* Now see whether we can use the transliteration module + for this step. */ + for (n = 0; n < ncsnames; ++n) + if (strcasecmp (steps[cnt].__from_name, csnames[n]) == 0) + { + /* Match! Now try the initializer. */ + if (trans_init_fct == NULL + || trans_init_fct (&result->__data[cnt].__trans.__data, + steps[cnt].__to_name) + == __GCONV_OK) + { + result->__data[cnt].__trans.__trans_fct = trans_fct; + result->__data[cnt].__trans.__trans_context_fct = + trans_context_fct; + result->__data[cnt].__trans.__trans_end_fct = + trans_end_fct; + } + break; + } } if (res != __GCONV_OK) diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 4b7004c..188fc04 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -19,6 +19,7 @@ Boston, MA 02111-1307, USA. */ #include +#include #include #include #include @@ -62,9 +63,10 @@ static const unsigned char encoding_byte[] = static inline int -internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend, +internal_ucs4_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { const unsigned char *inptr = *inptrp; @@ -102,10 +104,11 @@ internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend, #ifndef _STRING_ARCH_unaligned static inline int -internal_ucs4_loop_unaligned (const unsigned char **inptrp, +internal_ucs4_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { const unsigned char *inptr = *inptrp; @@ -149,12 +152,14 @@ internal_ucs4_loop_unaligned (const unsigned char **inptrp, static inline int -internal_ucs4_loop_single (const unsigned char **inptrp, +internal_ucs4_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + mbstate_t *state = step_data->__statep; size_t cnt = state->__count & 7; while (*inptrp < inend && cnt < 4) @@ -205,11 +210,13 @@ internal_ucs4_loop_single (const unsigned char **inptrp, static inline int -ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend, +ucs4_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; @@ -228,6 +235,10 @@ ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend, if (__builtin_expect (inval, 0) > 0x7fffffff) { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ if (flags & __GCONV_IGNORE_ERRORS) { /* Just ignore this character. */ @@ -259,23 +270,28 @@ ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend, #ifndef _STRING_ARCH_unaligned static inline int -ucs4_internal_loop_unaligned (const unsigned char **inptrp, +ucs4_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) { if (__builtin_expect (inptr[0], 0) > 0x80) { - /* The value is too large. */ + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ if (flags & __GCONV_IGNORE_ERRORS) { /* Just ignore this character. */ @@ -299,6 +315,7 @@ ucs4_internal_loop_unaligned (const unsigned char **inptrp, outptr[2] = inptr[2]; outptr[3] = inptr[3]; # endif + outptr += 4; } *inptrp = inptr; @@ -318,12 +335,15 @@ ucs4_internal_loop_unaligned (const unsigned char **inptrp, static inline int -ucs4_internal_loop_single (const unsigned char **inptrp, +ucs4_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; size_t cnt = state->__count & 7; while (*inptrp < inend && cnt < 4) @@ -341,7 +361,10 @@ ucs4_internal_loop_single (const unsigned char **inptrp, if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0) > 0x80) { - /* The value is too large. */ + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ if (!(flags & __GCONV_IGNORE_ERRORS)) { *inptrp -= cnt - (state->__count & 7); @@ -386,9 +409,10 @@ ucs4_internal_loop_single (const unsigned char **inptrp, static inline int -internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend, +internal_ucs4le_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { const unsigned char *inptr = *inptrp; @@ -426,10 +450,11 @@ internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend, #ifndef _STRING_ARCH_unaligned static inline int -internal_ucs4le_loop_unaligned (const unsigned char **inptrp, +internal_ucs4le_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { const unsigned char *inptr = *inptrp; @@ -473,12 +498,14 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, static inline int -internal_ucs4le_loop_single (const unsigned char **inptrp, +internal_ucs4le_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + mbstate_t *state = step_data->__statep; size_t cnt = state->__count & 7; while (*inptrp < inend && cnt < 4) @@ -526,11 +553,13 @@ internal_ucs4le_loop_single (const unsigned char **inptrp, static inline int -ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend, +ucs4le_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; @@ -549,6 +578,10 @@ ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend, if (__builtin_expect (inval, 0) > 0x7fffffff) { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ if (flags & __GCONV_IGNORE_ERRORS) { /* Just ignore this character. */ @@ -578,12 +611,14 @@ ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend, #ifndef _STRING_ARCH_unaligned static inline int -ucs4le_internal_loop_unaligned (const unsigned char **inptrp, +ucs4le_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; @@ -594,7 +629,10 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp, { if (__builtin_expect (inptr[3], 0) > 0x80) { - /* The value is too large. */ + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ if (flags & __GCONV_IGNORE_ERRORS) { /* Just ignore this character. */ @@ -639,12 +677,15 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp, static inline int -ucs4le_internal_loop_single (const unsigned char **inptrp, +ucs4le_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible) { + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; size_t cnt = state->__count & 7; while (*inptrp < inend && cnt < 4) @@ -662,7 +703,10 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0) > 0x80) { - /* The value is too large. */ + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ if (!(flags & __GCONV_IGNORE_ERRORS)) return __GCONV_ILLEGAL_INPUT; } @@ -710,6 +754,10 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, { \ if (__builtin_expect (*inptr, 0) > '\x7f') \ { \ + /* The value is too large. We don't try transliteration here since \ + this is not an error because of the lack of possibilities to \ + represent the result. This is a genuine bug in the input since \ + ASCII does not allow such values. */ \ if (! ignore_errors_p ()) \ { \ /* This is no correct ANSI_X3.4-1968 character. */ \ @@ -718,13 +766,14 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, } \ \ ++*irreversible; \ - ++inptr; \ + ++inptr; \ } \ else \ /* It's an one byte sequence. */ \ /* XXX unaligned. */ \ *((uint32_t *) outptr)++ = *inptr++; \ } +#define LOOP_NEED_FLAGS #include #include @@ -740,6 +789,13 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, #define FUNCTION_NAME __gconv_transform_internal_ascii #define ONE_DIRECTION 1 +extern int FUNCTION_NAME (struct __gconv_step *step, + struct __gconv_step_data *data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char *outbufstart, size_t *irreversible, + int do_flush, int consume_incomplete); + #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO #define LOOPFCT FROM_LOOP @@ -748,20 +804,31 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, /* XXX unaligned. */ \ if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \ { \ - if (! ignore_errors_p ()) \ + if (step_data->__trans.__trans_fct != NULL) \ + { \ + result = DL_CALL_FCT (step_data->__trans.__trans_fct, \ + (step, step_data, *inptrp, &inptr, inend, \ + *outptrp, &outptr, outend, irreversible)); \ + if (result != __GCONV_OK) \ + break; \ + } \ + else if (! ignore_errors_p ()) \ { \ /* This is no correct ANSI_X3.4-1968 character. */ \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ - \ - ++*irreversible; \ - inptr += 4; \ + else \ + { \ + ++*irreversible; \ + inptr += 4; \ + } \ } \ else \ /* It's an one byte sequence. */ \ *outptr++ = *((uint32_t *) inptr)++; \ } +#define LOOP_NEED_FLAGS #include #include @@ -916,7 +983,7 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, continue; \ } \ \ - if (NEED_LENGTH_TEST && __builtin_expect (inptr + cnt > inend, 0)) \ + if (__builtin_expect (inptr + cnt > inend, 0)) \ { \ /* We don't have enough input. But before we report that check \ that all the bytes are correct. */ \ @@ -979,6 +1046,7 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, /* Now adjust the pointers and store the result. */ \ *((uint32_t *) outptr)++ = ch; \ } +#define LOOP_NEED_FLAGS #define STORE_REST \ { \ @@ -1125,18 +1193,29 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, { \ if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \ { \ - if (! ignore_errors_p ()) \ + if (step_data->__trans.__trans_fct != NULL) \ + { \ + result = DL_CALL_FCT (step_data->__trans.__trans_fct, \ + (step, step_data, *inptrp, &inptr, inend, \ + *outptrp, &outptr, outend, irreversible)); \ + if (result != __GCONV_OK) \ + break; \ + } \ + else if (! ignore_errors_p ()) \ { \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ - \ - inptr += 4; \ - ++*irreversible; \ + else \ + { \ + inptr += 4; \ + ++*irreversible; \ + } \ } \ else \ *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \ } +#define LOOP_NEED_FLAGS #include #include @@ -1181,17 +1260,29 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, uint32_t val = *((uint32_t *) inptr); \ if (__builtin_expect (val, 0) >= 0x10000) \ { \ - if (! ignore_errors_p ()) \ + if (step_data->__trans.__trans_fct != NULL) \ + { \ + result = DL_CALL_FCT (step_data->__trans.__trans_fct, \ + (step, step_data, *inptrp, &inptr, inend, \ + *outptrp, &outptr, outend, irreversible)); \ + if (result != __GCONV_OK) \ + break; \ + } \ + else if (! ignore_errors_p ()) \ { \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ - \ - inptr += 4; \ - ++*irreversible; \ + else \ + { \ + inptr += 4; \ + ++*irreversible; \ + } \ + continue; \ } \ *((uint16_t *) outptr)++ = bswap_16 (val); \ inptr += 4; \ } +#define LOOP_NEED_FLAGS #include #include diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c new file mode 100644 index 0000000..11c542e --- /dev/null +++ b/iconv/gconv_trans.c @@ -0,0 +1,50 @@ +/* Transliteration using the locale's data. + Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include + +#include "gconv_int.h" +#include "../locale/localeinfo.h" + + +int +gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + __const unsigned char *inbufstart, + __const unsigned char **inbufp, + __const unsigned char *inbufend, + unsigned char *outbufstart, + unsigned char **outbufp, unsigned char *outbufend, + size_t *irreversible) +{ + /* Find out about the locale's transliteration. */ + uint_fast32_t size = _NL_CURRENT_WORD (LC_CTYPE, + _NL_CTYPE_TRANSLIT_HASH_SIZE); + uint_fast32_t layers = _NL_CURRENT_WORD (LC_CTYPE, + _NL_CTYPE_TRANSLIT_HASH_LAYERS); + + /* If there is no transliteration information in the locale don't do + anything and return the error. */ + if (size == 0) + return __GCONV_ILLEGAL_INPUT; + + /* XXX For now we don't do anything. */ + return __GCONV_ILLEGAL_INPUT; +} diff --git a/iconv/loop.c b/iconv/loop.c index c01e520..ebbc136 100644 --- a/iconv/loop.c +++ b/iconv/loop.c @@ -175,88 +175,57 @@ /* The function returns the status, as defined in gconv.h. */ static inline int -FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, +FCTNAME (LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible EXTRA_LOOP_DECLS) { - int result = __GCONV_OK; +#ifdef LOOP_NEED_STATE + mbstate_t *state = step_data->__statep; +#endif +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif + int result = __GCONV_EMPTY_INPUT; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - /* We run one loop where we avoid checks for underflow/overflow of the - buffers to speed up the conversion a bit. */ - size_t min_in_rounds = (inend - inptr) / MAX_NEEDED_INPUT; - size_t min_out_rounds = (outend - outptr) / MAX_NEEDED_OUTPUT; - size_t min_rounds = MIN (min_in_rounds, min_out_rounds); - #ifdef INIT_PARAMS INIT_PARAMS; #endif -#undef NEED_LENGTH_TEST -#define NEED_LENGTH_TEST 0 - while (min_rounds-- > 0) + while (inptr != inend) { - /* Here comes the body the user provides. It can stop with RESULT - set to GCONV_INCOMPLETE_INPUT (if the size of the input characters - vary in size), GCONV_ILLEGAL_INPUT, or GCONV_FULL_OUTPUT (if the - output characters vary in size. */ - BODY - } - - if (result == __GCONV_OK) - { -#if MIN_NEEDED_INPUT == MAX_NEEDED_INPUT \ - && MIN_NEEDED_OUTPUT == MAX_NEEDED_OUTPUT - /* We don't need to start another loop since we were able to determine - the maximal number of characters to copy in advance. What remains - to be determined is the status. */ - if (inptr == inend) - /* No more input. */ - result = __GCONV_EMPTY_INPUT; - else if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) - || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) - /* Overflow in the output buffer. */ - result = __GCONV_FULL_OUTPUT; - else - /* We have something left in the input buffer. */ - result = __GCONV_INCOMPLETE_INPUT; -#else - result = __GCONV_EMPTY_INPUT; - -# undef NEED_LENGTH_TEST -# define NEED_LENGTH_TEST 1 - while (inptr != inend) + /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the + compiler generating better code. It will optimized away + since MIN_NEEDED_OUTPUT is always a constant. */ + if ((MIN_NEEDED_OUTPUT != 1 + && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) + || (MIN_NEEDED_OUTPUT == 1 + && __builtin_expect (outptr >= outend, 0))) + { + /* Overflow in the output buffer. */ + result = __GCONV_FULL_OUTPUT; + break; + } + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) { - /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the - compiler generating better code. It will optimized away - since MIN_NEEDED_OUTPUT is always a constant. */ - if ((MIN_NEEDED_OUTPUT != 1 - && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) - || (MIN_NEEDED_OUTPUT == 1 - && __builtin_expect (outptr >= outend, 0))) - { - /* Overflow in the output buffer. */ - result = __GCONV_FULL_OUTPUT; - break; - } - if (MIN_NEEDED_INPUT > 1 - && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) - { - /* We don't have enough input for another complete input - character. */ - result = __GCONV_INCOMPLETE_INPUT; - break; - } - - /* Here comes the body the user provides. It can stop with - RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the - input characters vary in size), GCONV_ILLEGAL_INPUT, or - GCONV_FULL_OUTPUT (if the output characters vary in size). */ - BODY + /* We don't have enough input for another complete input + character. */ + result = __GCONV_INCOMPLETE_INPUT; + break; } -#endif /* Input and output charset are not both fixed width. */ + + /* Here comes the body the user provides. It can stop with + RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the + input characters vary in size), GCONV_ILLEGAL_INPUT, or + GCONV_FULL_OUTPUT (if the output characters vary in size). */ + BODY } /* Update the pointers pointed to by the parameters. */ @@ -291,11 +260,19 @@ FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, # define SINGLE(fct) SINGLE2 (fct) # define SINGLE2(fct) fct##_single static inline int -SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, +SINGLE(LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, int flags, void *data, size_t *irreversible - EXTRA_LOOP_DECLS) + size_t *irreversible EXTRA_LOOP_DECLS) { + mbstate_t *state = step_data->__statep; +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif int result = __GCONV_OK; unsigned char bytebuf[MAX_NEEDED_INPUT]; const unsigned char *inptr = *inptrp; @@ -347,8 +324,7 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, inptr = bytebuf; inend = &bytebuf[inlen]; -#undef NEED_LENGTH_TEST -#define NEED_LENGTH_TEST 1 + do { BODY @@ -410,9 +386,12 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, #undef EXTRA_LOOP_DECLS #undef INIT_PARAMS #undef UPDATE_PARAMS +#undef UNPACK_BYTES +#undef LOOP_NEED_STATE +#undef LOOP_NEED_FLAGS +#undef LOOP_NEED_DATA #undef get16 #undef get32 #undef put16 #undef put32 #undef unaligned -#undef UNPACK_BYTES diff --git a/iconv/skeleton.c b/iconv/skeleton.c index 9b7b4a1..dca2c7f 100644 --- a/iconv/skeleton.c +++ b/iconv/skeleton.c @@ -271,7 +271,8 @@ gconv_init (struct __gconv_step *step) int FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, const unsigned char **inptrp, const unsigned char *inend, - size_t *irreversible, int do_flush, int consume_incomplete) + unsigned char *outbufstart, size_t *irreversible, int do_flush, + int consume_incomplete) { struct __gconv_step *next_step = step + 1; struct __gconv_step_data *next_data = data + 1; @@ -295,13 +296,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, successfully emitted the escape sequence. */ if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST)) status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, - irreversible, 1, consume_incomplete)); + next_data->__outbuf, irreversible, 1, + consume_incomplete)); } else { /* We preserve the initial values of the pointer variables. */ const unsigned char *inptr = *inptrp; - unsigned char *outbuf = data->__outbuf; + unsigned char *outbuf = outbufstart; unsigned char *outend = data->__outbufend; unsigned char *outstart; /* This variable is used to count the number of characters we @@ -333,19 +335,16 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, # if MAX_NEEDED_FROM > 1 if (MAX_NEEDED_TO == 1 || FROM_DIRECTION) - status = SINGLE(FROM_LOOP) (inptrp, inend, &outbuf, outend, - data->__statep, data->__flags, - step->__data, &lirreversible + status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf, + outend, &lirreversible EXTRA_LOOP_ARGS); # endif # if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION else # endif # if MAX_NEEDED_TO > 1 && !ONE_DIRECTION - status = SINGLE(TO_LOOP) (inptrp, inend, &outbuf, outend, - data->__statep, data->__flags, - step->__data, &lirreversible - EXTRA_LOOP_ARGS); + status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf, + outend, &lirreversible EXTRA_LOOP_ARGS); # endif if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) @@ -386,16 +385,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, { if (FROM_DIRECTION) /* Run the conversion loop. */ - status = FROM_LOOP (inptrp, inend, &outbuf, outend, - data->__statep, data->__flags, - step->__data, &lirreversible - EXTRA_LOOP_ARGS); + status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, + &lirreversible EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ - status = TO_LOOP (inptrp, inend, &outbuf, outend, - data->__statep, data->__flags, - step->__data, &lirreversible - EXTRA_LOOP_ARGS); + status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, + &lirreversible EXTRA_LOOP_ARGS); } #if !defined _STRING_ARCH_unaligned \ && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ @@ -404,18 +399,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, { if (FROM_DIRECTION) /* Run the conversion loop. */ - status = GEN_unaligned (FROM_LOOP) (inptrp, inend, &outbuf, - outend, data->__statep, - data->__flags, - step->__data, + status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend, + &outbuf, outend, &lirreversible EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ - status = GEN_unaligned (TO_LOOP) (inptrp, inend, &outbuf, - outend, data->__statep, - data->__flags, - step->__data, + status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend, + &outbuf, outend, &lirreversible EXTRA_LOOP_ARGS); } @@ -445,7 +436,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, int result; result = DL_CALL_FCT (fct, (next_step, next_data, &outerr, - outbuf, irreversible, 0, + outbuf, next_data->__outbuf, + irreversible, 0, consume_incomplete)); if (result != __GCONV_EMPTY_INPUT) @@ -471,22 +463,20 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, /* XXX Handle unaligned access here as well. */ if (FROM_DIRECTION) /* Run the conversion loop. */ - nstatus = FROM_LOOP ((const unsigned char **) inptrp, + nstatus = FROM_LOOP (step, data, + (const unsigned char **) inptrp, (const unsigned char *) inend, (unsigned char **) &outbuf, (unsigned char *) outerr, - data->__statep, data->__flags, - step->__data, &lirreversible - EXTRA_LOOP_ARGS); + &lirreversible EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ - nstatus = TO_LOOP ((const unsigned char **) inptrp, + nstatus = TO_LOOP (step, data, + (const unsigned char **) inptrp, (const unsigned char *) inend, (unsigned char **) &outbuf, (unsigned char *) outerr, - data->__statep, data->__flags, - step->__data, &lirreversible - EXTRA_LOOP_ARGS); + &lirreversible EXTRA_LOOP_ARGS); /* We must run out of output buffer space in this rerun. */ -- cgit v1.1