diff options
45 files changed, 543 insertions, 163 deletions
@@ -1,3 +1,72 @@ +2002-11-30 Bruno Haible <bruno@clisp.org> + + * iconv/gconv.h (__gconv_btowc_fct): New typedef. + (struct __gconv_step): New field __btowc_fct. + * wcsmbs/btowc.c (__btowc): Use the __btowc_fct shortcut if possible. + * iconv/gconv_int.h (__BUILTIN_TRANSFORM): Renamed from + __BUILTIN_TRANS. + (__gconv_btwoc_ascii): New declaration. + * iconv/gconv_simple.c (BUILTIN_TRANSFORMATION): Add BtowcFct argument. + (__gconv_btwoc_ascii): New function. + * iconv/gconv_builtin.h: Add BtowcFct argument to all + BUILTIN_TRANSFORMATION invocations. + * iconv/gconv_conf.c (BUILTIN_TRANSFORMATION): Add BtowcFct argument. + * iconv/iconvconfig.c (BUILTIN_TRANSFORMATION): Likewise. + * iconv/gconv_builtin.c (map): New field btowc_fct. + (BUILTIN_TRANSFORMATION): Add BtowcFct argument. Use it to initialize + btowc_fct field. + (__gconv_get_builtin_trans): Initialize __btowc_fct field. + * iconv/gconv_cache.c (find_module): Initialize __btowc_fct field. + * iconv/gconv_db.c (gen_steps, increment_counter): Likewise. + * wcsmbs/wcsmbsload.c (to_wc, to_mb): Likewise. + * iconv/skeleton.c: Document STORE_REST and FROM_ONEBYTE. + (gconv_init): Initialize __btowc_fct field. + Undefine EXTRA_LOOP_ARGS and FROM_ONEBYTE at the end. + * iconv/loop.c: Document ONEBYTE_BODY. + (gconv_btowc, FROM_ONEBYTE): Define if ONEBYTE_BODY is defined. + Undefine ONEBYTE_BODY at the end. + * iconvdata/8bit-generic.c (ONEBYTE_BODY): New macro. + * iconvdata/8bit-gap.c (NONNUL): New macro. + (BODY for FROM_LOOP): Use it. + (ONEBYTE_BODY): New macro. + * iconvdata/isiri-3342.c (HAS_HOLES): Set to 1. + (NONNUL): New macro. + * iconvdata/ansi_x3.110.c (ONEBYTE_BODY): New macro. + * iconvdata/armscii-8.c (ONEBYTE_BODY): New macro. + * iconvdata/cp1255.c (ONEBYTE_BODY): New macro. + * iconvdata/cp1258.c (ONEBYTE_BODY): New macro. + * iconvdata/tcvn5712-1.c (ONEBYTE_BODY): New macro. + * iconvdata/big5.c (ONEBYTE_BODY): New macro. + * iconvdata/big5hkscs.c (ONEBYTE_BODY): New macro. + * iconvdata/euc-cn.c (ONEBYTE_BODY): New macro. + * iconvdata/euc-jp.c (ONEBYTE_BODY): New macro. + * iconvdata/euc-jisx0213.c (ONEBYTE_BODY): New macro. + * iconvdata/euc-kr.c (ONEBYTE_BODY): New macro. + * iconvdata/euc-tw.c (ONEBYTE_BODY): New macro. + * iconvdata/gbk.c (ONEBYTE_BODY): New macro. + * iconvdata/gb18030.c (ONEBYTE_BODY): New macro. + * iconvdata/ibm932.c: Include <stdbool.h>. + (TRUE, FALSE): Remove macros. + (BODY for FROM_LOOP): Remove unused variable rp1. + (ONEBYTE_BODY): New macro. + (BODY for TO_LOOP): Use bool. + * iconvdata/ibm932.h (__ibm932sb_to_ucs4_idx): Remove array. + * iconvdata/ibm943.c: Include <stdbool.h>. + (TRUE, FALSE): Remove macros. + (BODY for FROM_LOOP): Remove unused variable rp1. + (ONEBYTE_BODY): New macro. + (BODY for TO_LOOP): Use bool. + * iconvdata/ibm943.h (__ibm943sb_to_ucs4_idx): Remove array. + * iconvdata/iso8859-1.c (ONEBYTE_BODY): New macro. + * iconvdata/iso_6937-2.c (ONEBYTE_BODY): New macro. + * iconvdata/iso_6937.c (ONEBYTE_BODY): New macro. + * iconvdata/johab.c (ONEBYTE_BODY): New macro. + * iconvdata/sjis.c (ONEBYTE_BODY): New macro. + * iconvdata/shift_jisx0213.c (ONEBYTE_BODY): New macro. + * iconvdata/t.61.c (ONEBYTE_BODY): New macro. + * iconvdata/uhc.c (ONEBYTE_BODY): New macro. + * iconvdata/gbbig5.c: Tweak comment. + 2002-12-02 Ulrich Drepper <drepper@redhat.com> * po/fi.po: Update from translation team. @@ -1,4 +1,4 @@ -GNU C Library NEWS -- history of user-visible changes. 2002-11-5 +GNU C Library NEWS -- history of user-visible changes. 2002-12-2 Copyright (C) 1992-2001, 2002 Free Software Foundation, Inc. See the end for copying conditions. @@ -17,6 +17,9 @@ Version 2.3.2 This normally expands to lib, but on some 64-bit platforms to lib64 instead. * fexecve is implemented on Linux. + +* the btowc() function should work 2+ times faster due to specialized + callbacks in the iconv modules. Implemented by Bruno Haible. Version 2.3 diff --git a/iconv/gconv_builtin.c b/iconv/gconv_builtin.c index 45bd4e7..f653d6c 100644 --- a/iconv/gconv_builtin.c +++ b/iconv/gconv_builtin.c @@ -1,5 +1,5 @@ /* Table for builtin transformation mapping. - Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1997-1999, 2000-2002 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -31,6 +31,7 @@ static struct builtin_map { const char *name; __gconv_fct fct; + __gconv_btowc_fct btowc_fct; int min_needed_from; int max_needed_from; @@ -39,11 +40,12 @@ static struct builtin_map } map[] = { -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ - MinT, MaxT) \ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ { \ .name = Name, \ .fct = Fct, \ + .btowc_fct = BtowcFct, \ \ .min_needed_from = MinF, \ .max_needed_from = MaxF, \ @@ -69,6 +71,7 @@ __gconv_get_builtin_trans (const char *name, struct __gconv_step *step) assert (cnt < sizeof (map) / sizeof (map[0])); step->__fct = map[cnt].fct; + step->__btowc_fct = map[cnt].btowc_fct; step->__init_fct = NULL; step->__end_fct = NULL; step->__shlib_handle = NULL; diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index 1a9d8a8..bd34c25 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -1,5 +1,5 @@ /* Builtin transformations. - Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1997-1999, 2000-2002 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -30,14 +30,14 @@ BUILTIN_ALIAS ("OSF00010105//", "ISO-10646/UCS4/") /* level 2 */ BUILTIN_ALIAS ("OSF00010106//", "ISO-10646/UCS4/") /* level 3 */ BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4", - __gconv_transform_internal_ucs4, 4, 4, 4, 4) + __gconv_transform_internal_ucs4, NULL, 4, 4, 4, 4) BUILTIN_TRANSFORMATION ("ISO-10646/UCS4/", "INTERNAL", 1, "=ucs4->INTERNAL", - __gconv_transform_ucs4_internal, 4, 4, 4, 4) + __gconv_transform_ucs4_internal, NULL, 4, 4, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "UCS-4LE//", 1, "=INTERNAL->ucs4le", - __gconv_transform_internal_ucs4le, 4, 4, 4, 4) + __gconv_transform_internal_ucs4le, NULL, 4, 4, 4, 4) BUILTIN_TRANSFORMATION ("UCS-4LE//", "INTERNAL", 1, "=ucs4le->INTERNAL", - __gconv_transform_ucs4le_internal, 4, 4, 4, 4) + __gconv_transform_ucs4le_internal, NULL, 4, 4, 4, 4) BUILTIN_ALIAS ("WCHAR_T//", "INTERNAL") @@ -48,10 +48,11 @@ BUILTIN_ALIAS ("OSF05010001//", "ISO-10646/UTF8/") BUILTIN_ALIAS ("ISO-10646/UTF-8/", "ISO-10646/UTF8/") BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UTF8/", 1, "=INTERNAL->utf8", - __gconv_transform_internal_utf8, 4, 4, 1, 6) + __gconv_transform_internal_utf8, NULL, 4, 4, 1, 6) BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "INTERNAL", 1, "=utf8->INTERNAL", - __gconv_transform_utf8_internal, 1, 6, 4, 4) + __gconv_transform_utf8_internal, __gconv_btwoc_ascii, + 1, 6, 4, 4) BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/") BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/") @@ -60,10 +61,10 @@ BUILTIN_ALIAS ("OSF00010101//", "ISO-10646/UCS2/") /* level 2 */ BUILTIN_ALIAS ("OSF00010102//", "ISO-10646/UCS2/") /* level 3 */ BUILTIN_TRANSFORMATION ("ISO-10646/UCS2/", "INTERNAL", 1, "=ucs2->INTERNAL", - __gconv_transform_ucs2_internal, 2, 2, 4, 4) + __gconv_transform_ucs2_internal, NULL, 2, 2, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS2/", 1, "=INTERNAL->ucs2", - __gconv_transform_internal_ucs2, 4, 4, 2, 2) + __gconv_transform_internal_ucs2, NULL, 4, 4, 2, 2) BUILTIN_ALIAS ("ANSI_X3.4//", "ANSI_X3.4-1968//") @@ -80,10 +81,11 @@ BUILTIN_ALIAS ("CSASCII//", "ANSI_X3.4-1968//") BUILTIN_ALIAS ("OSF00010020//", "ANSI_X3.4-1968//") BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "INTERNAL", 1, "=ascii->INTERNAL", - __gconv_transform_ascii_internal, 4, 4, 1, 1) + __gconv_transform_ascii_internal, __gconv_btwoc_ascii, + 4, 4, 1, 1) BUILTIN_TRANSFORMATION ("INTERNAL", "ANSI_X3.4-1968//", 1, "=INTERNAL->ascii", - __gconv_transform_internal_ascii, 4, 4, 1, 1) + __gconv_transform_internal_ascii, NULL, 4, 4, 1, 1) #if BYTE_ORDER == BIG_ENDIAN @@ -94,11 +96,13 @@ BUILTIN_ALIAS ("UCS-2LE//", "UNICODELITTLE//") BUILTIN_TRANSFORMATION ("UNICODELITTLE//", "INTERNAL", 1, "=ucs2reverse->INTERNAL", - __gconv_transform_ucs2reverse_internal, 2, 2, 4, 4) + __gconv_transform_ucs2reverse_internal, NULL, + 2, 2, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODELITTLE//", 1, "=INTERNAL->ucs2reverse", - __gconv_transform_internal_ucs2reverse, 4, 4, 2, 2) + __gconv_transform_internal_ucs2reverse, NULL, + 4, 4, 2, 2) #else BUILTIN_ALIAS ("UNICODELITTLE//", "ISO-10646/UCS2/") BUILTIN_ALIAS ("UCS-2LE//", "ISO-10646/UCS2/") @@ -107,9 +111,11 @@ BUILTIN_ALIAS ("UCS-2BE//", "UNICODEBIG//") BUILTIN_TRANSFORMATION ("UNICODEBIG//", "INTERNAL", 1, "=ucs2reverse->INTERNAL", - __gconv_transform_ucs2reverse_internal, 2, 2, 4, 4) + __gconv_transform_ucs2reverse_internal, NULL, + 2, 2, 4, 4) BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, "=INTERNAL->ucs2reverse", - __gconv_transform_internal_ucs2reverse, 4, 4, 2, 2) + __gconv_transform_internal_ucs2reverse, NULL, + 4, 4, 2, 2) #endif diff --git a/iconv/gconv_cache.c b/iconv/gconv_cache.c index 8f92cba..882acc6 100644 --- a/iconv/gconv_cache.c +++ b/iconv/gconv_cache.c @@ -201,7 +201,11 @@ find_module (const char *directory, const char *filename, result->__init_fct = result->__shlib_handle->init_fct; result->__end_fct = result->__shlib_handle->end_fct; + /* These settings can be overridden by the init function. */ + result->__btowc_fct = NULL; result->__data = NULL; + + /* Call the init function. */ if (result->__init_fct != NULL) status = DL_CALL_FCT (result->__init_fct, (result)); } diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c index cd5055c..1262e76 100644 --- a/iconv/gconv_conf.c +++ b/iconv/gconv_conf.c @@ -61,8 +61,8 @@ static const char gconv_module_ext[] = MODULE_EXT; /* We have a few builtin transformations. */ static struct gconv_module builtin_modules[] = { -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ - MinT, MaxT) \ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ { \ from_string: From, \ to_string: To, \ @@ -73,18 +73,21 @@ static struct gconv_module builtin_modules[] = #define BUILTIN_ALIAS(From, To) #include "gconv_builtin.h" -}; #undef BUILTIN_TRANSFORMATION #undef BUILTIN_ALIAS +}; static const char *builtin_aliases[] = { -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ - MinT, MaxT) +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) #define BUILTIN_ALIAS(From, To) From " " To, #include "gconv_builtin.h" + +#undef BUILTIN_TRANSFORMATION +#undef BUILTIN_ALIAS }; #ifdef USE_IN_LIBIO diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index 70c33df..020b556 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -268,6 +268,9 @@ gen_steps (struct derivation_step *best, const char *toset, result[step_cnt].__init_fct = shlib_handle->init_fct; result[step_cnt].__end_fct = shlib_handle->end_fct; + /* These settings can be overridden by the init function. */ + result[step_cnt].__btowc_fct = NULL; + /* Call the init function. */ if (result[step_cnt].__init_fct != NULL) { @@ -353,8 +356,12 @@ increment_counter (struct __gconv_step *steps, size_t nsteps) step->__fct = step->__shlib_handle->fct; step->__init_fct = step->__shlib_handle->init_fct; step->__end_fct = step->__shlib_handle->end_fct; + + /* These settings can be overridden by the init function. */ + step->__btowc_fct = NULL; } + /* Call the init function. */ if (step->__init_fct != NULL) DL_CALL_FCT (step->__init_fct, (step)); } diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 1ac878d..dec29d9c 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -264,7 +264,7 @@ extern int __gconv_transliterate (struct __gconv_step *step, /* Builtin transformations. */ #ifdef _LIBC -# define __BUILTIN_TRANS(Name) \ +# define __BUILTIN_TRANSFORM(Name) \ extern int Name (struct __gconv_step *step, \ struct __gconv_step_data *data, \ const unsigned char **inbuf, \ @@ -272,21 +272,25 @@ extern int __gconv_transliterate (struct __gconv_step *step, unsigned char **outbufstart, size_t *irreversible, \ int do_flush, int consume_incomplete) -__BUILTIN_TRANS (__gconv_transform_ascii_internal); -__BUILTIN_TRANS (__gconv_transform_internal_ascii); -__BUILTIN_TRANS (__gconv_transform_utf8_internal); -__BUILTIN_TRANS (__gconv_transform_internal_utf8); -__BUILTIN_TRANS (__gconv_transform_ucs2_internal); -__BUILTIN_TRANS (__gconv_transform_internal_ucs2); -__BUILTIN_TRANS (__gconv_transform_ucs2reverse_internal); -__BUILTIN_TRANS (__gconv_transform_internal_ucs2reverse); -__BUILTIN_TRANS (__gconv_transform_internal_ucs4); -__BUILTIN_TRANS (__gconv_transform_ucs4_internal); -__BUILTIN_TRANS (__gconv_transform_internal_ucs4le); -__BUILTIN_TRANS (__gconv_transform_ucs4le_internal); -__BUILTIN_TRANS (__gconv_transform_internal_utf16); -__BUILTIN_TRANS (__gconv_transform_utf16_internal); -# undef __BUITLIN_TRANS +__BUILTIN_TRANSFORM (__gconv_transform_ascii_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ascii); +__BUILTIN_TRANSFORM (__gconv_transform_utf8_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_utf8); +__BUILTIN_TRANSFORM (__gconv_transform_ucs2_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2); +__BUILTIN_TRANSFORM (__gconv_transform_ucs2reverse_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2reverse); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4); +__BUILTIN_TRANSFORM (__gconv_transform_ucs4_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le); +__BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_utf16); +__BUILTIN_TRANSFORM (__gconv_transform_utf16_internal); +# undef __BUITLIN_TRANSFORM + +/* Specialized conversion function for a single byte to INTERNAL, recognizing + only ASCII characters. */ +extern wint_t __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c); #endif diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 35346aa..3937b95 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -28,10 +28,11 @@ #include <string.h> #include <wchar.h> #include <sys/param.h> +#include <gconv_int.h> #define BUILTIN_ALIAS(s1, s2) /* nothing */ -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ - MinT, MaxT) \ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ __const unsigned char **, __const unsigned char *, \ unsigned char **, size_t *, int, int); @@ -43,6 +44,18 @@ #endif +/* Specialized conversion function for a single byte to INTERNAL, recognizing + only ASCII characters. */ +wint_t +__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c) +{ + if (c < 0x80) + return c; + else + return WEOF; +} + + /* Transform from the internal, UCS4-like format, to UCS4. The difference between the internal ucs4 format and the real UCS4 format is, if any, the endianess. The Unicode/ISO 10646 says that diff --git a/iconv/iconvconfig.c b/iconv/iconvconfig.c index 62e3e4e..a3c0a4b 100644 --- a/iconv/iconvconfig.c +++ b/iconv/iconvconfig.c @@ -201,8 +201,8 @@ static struct { #define BUILTIN_ALIAS(alias, real) \ { .from = alias, .to = real }, -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ - MinT, MaxT) +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) #include <gconv_builtin.h> }; #undef BUILTIN_ALIAS @@ -218,11 +218,13 @@ static struct } builtin_trans[] = { #define BUILTIN_ALIAS(alias, real) -#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ - MinT, MaxT) \ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ { .from = From, .to = To, .module = Name, .cost = Cost }, #include <gconv_builtin.h> }; +#undef BUILTIN_ALIAS +#undef BUILTIN_TRANSFORMATION #define nbuiltin_trans (sizeof (builtin_trans) / sizeof (builtin_trans[0])) diff --git a/iconv/loop.c b/iconv/loop.c index deb0173..b0b76f3 100644 --- a/iconv/loop.c +++ b/iconv/loop.c @@ -43,6 +43,9 @@ INIT_PARAMS code to define and initialize variables from params. UPDATE_PARAMS code to store result in params. + + ONEBYTE_BODY body of the specialized conversion function for a + single byte from the current character set to INTERNAL. */ #include <assert.h> @@ -453,6 +456,15 @@ SINGLE(LOOPFCT) (struct __gconv_step *step, #endif +#ifdef ONEBYTE_BODY +/* Define the shortcut function for btowc. */ +static wint_t +gconv_btowc (struct __gconv_step *step, unsigned char c) + ONEBYTE_BODY +# define FROM_ONEBYTE gconv_btowc +#endif + + /* We remove the macro definitions so that we can include this file again for the definition of another function. */ #undef MIN_NEEDED_INPUT @@ -465,6 +477,7 @@ SINGLE(LOOPFCT) (struct __gconv_step *step, #undef EXTRA_LOOP_DECLS #undef INIT_PARAMS #undef UPDATE_PARAMS +#undef ONEBYTE_BODY #undef UNPACK_BYTES #undef LOOP_NEED_STATE #undef LOOP_NEED_FLAGS diff --git a/iconv/skeleton.c b/iconv/skeleton.c index edcd92e..579426c 100644 --- a/iconv/skeleton.c +++ b/iconv/skeleton.c @@ -101,6 +101,26 @@ EXTRA_LOOP_ARGS optional macro specifying extra arguments passed to loop function. + STORE_REST optional, needed only when MAX_NEEDED_FROM > 4. + This macro stores the seen but unconverted input bytes + in the state. + + FROM_ONEBYTE optional. If defined, should be the name of a + specialized conversion function for a single byte + from the current character set to INTERNAL. This + function has prototype + wint_t + FROM_ONEBYTE (struct __gconv_step *, unsigned char); + and does a special conversion: + - The input is a single byte. + - The output is a single uint32_t. + - The state before the conversion is the initial state; + the state after the conversion is irrelevant. + - No transliteration. + - __invocation_counter = 0. + - __internal_use = 1. + - do_flush = 0. + Modules can use mbstate_t to store conversion state as follows: * Bits 2..0 of '__count' contain the number of lookahead input bytes @@ -315,6 +335,10 @@ gconv_init (struct __gconv_step *step) step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM; step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO; step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO; + +#ifdef FROM_ONEBYTE + step->__btowc_fct = FROM_ONEBYTE; +#endif } else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0) { @@ -796,10 +820,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, #undef EMIT_SHIFT_TO_INIT #undef FROM_LOOP #undef TO_LOOP +#undef ONE_DIRECTION #undef SAVE_RESET_STATE #undef RESET_INPUT_BUFFER #undef FUNCTION_NAME #undef PREPARE_LOOP #undef END_LOOP -#undef ONE_DIRECTION +#undef EXTRA_LOOP_ARGS #undef STORE_REST +#undef FROM_ONEBYTE diff --git a/iconvdata/8bit-gap.c b/iconvdata/8bit-gap.c index 2ddc49e..9d48d17 100644 --- a/iconvdata/8bit-gap.c +++ b/iconvdata/8bit-gap.c @@ -32,6 +32,10 @@ struct gap /* Now we can include the tables. */ #include TABLES +#ifndef NONNUL +# define NONNUL(c) ((c) != '\0') +#endif + #define FROM_LOOP from_gap #define TO_LOOP to_gap @@ -49,7 +53,7 @@ struct gap { \ uint32_t ch = to_ucs4[*inptr]; \ \ - if (HAS_HOLES && __builtin_expect (ch, L'\1') == L'\0' && *inptr != '\0') \ + if (HAS_HOLES && __builtin_expect (ch == L'\0', 0) && NONNUL (*inptr)) \ { \ /* This is an illegal character. */ \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ @@ -63,6 +67,15 @@ struct gap ++inptr; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + uint32_t ch = to_ucs4[c]; \ + \ + if (HAS_HOLES && __builtin_expect (ch == L'\0', 0) && NONNUL (c)) \ + return WEOF; \ + else \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/8bit-generic.c b/iconvdata/8bit-generic.c index 6ed5532..373289f 100644 --- a/iconvdata/8bit-generic.c +++ b/iconvdata/8bit-generic.c @@ -47,6 +47,15 @@ ++inptr; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + uint32_t ch = to_ucs4[c]; \ + \ + if (HAS_HOLES && __builtin_expect (ch == L'\0', 0) && c != '\0') \ + return WEOF; \ + else \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/ansi_x3.110.c b/iconvdata/ansi_x3.110.c index 08746cf..b20e12a 100644 --- a/iconvdata/ansi_x3.110.c +++ b/iconvdata/ansi_x3.110.c @@ -404,7 +404,7 @@ static const char from_ucs4[][2] = \ if (__builtin_expect (ch >= 0xc1, 0) && ch <= 0xcf) \ { \ - /* Composed character. First test whether the next character \ + /* Composed character. First test whether the next byte \ is also available. */ \ uint32_t ch2; \ \ @@ -449,6 +449,15 @@ static const char from_ucs4[][2] = inptr += incr; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + uint32_t ch = to_ucs4[c]; \ + \ + if (__builtin_expect (ch == 0, 0) && c != '\0') \ + return WEOF; \ + else \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/armscii-8.c b/iconvdata/armscii-8.c index b3db9a9..dbaf8be 100644 --- a/iconvdata/armscii-8.c +++ b/iconvdata/armscii-8.c @@ -71,6 +71,17 @@ static const uint16_t map_from_armscii_8[0xfe - 0xa2 + 1] = ++inptr; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c <= 0xa0) \ + /* Upto and including 0xa0 the ARMSCII-8 corresponds to Unicode. */ \ + return c; \ + else if (c >= 0xa2 && c <= 0xfe) \ + /* Use the table. */ \ + return map_from_armscii_8[c - 0xa2]; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/big5.c b/iconvdata/big5.c index 22c3ab6..cdf8a76 100644 --- a/iconvdata/big5.c +++ b/iconvdata/big5.c @@ -8397,7 +8397,7 @@ static const char from_ucs4_tab15[][2] = \ if (ch >= 0xa1 && ch <= 0xf9) \ { \ - /* Two-byte character. First test whether the next character \ + /* Two-byte character. First test whether the next byte \ is also available. */ \ uint32_t ch2; \ int idx; \ @@ -8447,6 +8447,13 @@ static const char from_ucs4_tab15[][2] = outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c <= 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/big5hkscs.c b/iconvdata/big5hkscs.c index 0903968..b1aedd1 100644 --- a/iconvdata/big5hkscs.c +++ b/iconvdata/big5hkscs.c @@ -16817,7 +16817,7 @@ static struct \ if (ch >= 0x81 && ch <= 0xfe) \ { \ - /* Two-byte character. First test whether the next character \ + /* Two-byte character. First test whether the next byte \ is also available. */ \ uint32_t ch2; \ int idx; \ @@ -16852,6 +16852,13 @@ static struct outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c <= 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/cp1255.c b/iconvdata/cp1255.c index 19edb27..6cccf52 100644 --- a/iconvdata/cp1255.c +++ b/iconvdata/cp1255.c @@ -321,6 +321,15 @@ static const struct { unsigned int idx; unsigned int len; } comp_table[8] = { } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS , int *statep +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + return c; \ + uint32_t ch = to_ucs4[c - 0x80]; \ + if (ch == L'\0' || (ch >= 0x05d0 && ch <= 0x05f2)) \ + return WEOF; \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/cp1258.c b/iconvdata/cp1258.c index 853ef63..553d50b 100644 --- a/iconvdata/cp1258.c +++ b/iconvdata/cp1258.c @@ -480,6 +480,22 @@ static const struct } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS , int *statep +#define ONEBYTE_BODY \ + { \ + uint32_t ch; \ + \ + if (c < 0x80) \ + ch = c; \ + else \ + { \ + ch = to_ucs4[c - 0x80]; \ + if (ch == L'\0') \ + return WEOF; \ + } \ + if (ch >= 0x0041 && ch <= 0x01b0) \ + return WEOF; \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/euc-cn.c b/iconvdata/euc-cn.c index c1bcadb..f912317 100644 --- a/iconvdata/euc-cn.c +++ b/iconvdata/euc-cn.c @@ -54,7 +54,7 @@ else \ { \ /* Two or more byte character. First test whether the \ - next character is also available. */ \ + next byte is also available. */ \ const unsigned char *endp; \ \ if (__builtin_expect (inptr + 1 >= inend, 0)) \ @@ -88,6 +88,13 @@ outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/euc-jisx0213.c b/iconvdata/euc-jisx0213.c index 733cbc1..dc6ccfd 100644 --- a/iconvdata/euc-jisx0213.c +++ b/iconvdata/euc-jisx0213.c @@ -230,6 +230,13 @@ } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS , int *statep +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/euc-jp.c b/iconvdata/euc-jp.c index 7faa080..9cb6a47 100644 --- a/iconvdata/euc-jp.c +++ b/iconvdata/euc-jp.c @@ -117,6 +117,13 @@ put32 (outptr, ch); \ outptr += 4; \ } +#define ONEBYTE_BODY \ + { \ + if (c < 0x8e || (c >= 0x90 && c <= 0x9f)) \ + return c; \ + else \ + return WEOF; \ + } #define LOOP_NEED_FLAGS #include <iconv/loop.c> diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c index 37ed71f..7774720 100644 --- a/iconvdata/euc-kr.c +++ b/iconvdata/euc-kr.c @@ -90,7 +90,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp) } \ else \ { \ - /* Two-byte character. First test whether the next character \ + /* Two-byte character. First test whether the next byte \ is also available. */ \ ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0x80); \ if (__builtin_expect (ch == 0, 0)) \ @@ -108,6 +108,13 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp) outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c <= 0x9f) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/euc-tw.c b/iconvdata/euc-tw.c index 8d3b1aa..6674dad 100644 --- a/iconvdata/euc-tw.c +++ b/iconvdata/euc-tw.c @@ -112,6 +112,13 @@ outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/gb18030.c b/iconvdata/gb18030.c index f6b3576..31f3c20 100644 --- a/iconvdata/gb18030.c +++ b/iconvdata/gb18030.c @@ -25772,6 +25772,13 @@ static const unsigned char __ucs_to_gb18030_tab2[8192][2] = *((uint32_t *) outptr)++ = ch; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/gbbig5.c b/iconvdata/gbbig5.c index 695ffb2..b2b4743 100644 --- a/iconvdata/gbbig5.c +++ b/iconvdata/gbbig5.c @@ -4802,7 +4802,7 @@ const char __from_big5_to_gb2312 [13973][2] = else if (ch >= 0xa1 && ch <= 0xf7) \ { \ /* Two-byte character. First test whether the \ - next character is also available. */ \ + next byte is also available. */ \ const char *cp; \ int idx; \ \ @@ -4891,7 +4891,7 @@ const char __from_big5_to_gb2312 [13973][2] = else if (ch >= 0xa1 && ch <= 0xf9) \ { \ /* Two byte character. First test whether the \ - next character is also available. */ \ + next byte is also available. */ \ const char *cp; \ int idx; \ \ diff --git a/iconvdata/gbk.c b/iconvdata/gbk.c index a738a9e..ad8a374 100644 --- a/iconvdata/gbk.c +++ b/iconvdata/gbk.c @@ -13154,7 +13154,7 @@ static const char __gbk_from_ucs4_tab12[][2] = else \ { \ /* Two or more byte character. First test whether the \ - next character is also available. */ \ + next byte is also available. */ \ uint32_t ch2; \ int idx; \ \ @@ -13195,6 +13195,13 @@ static const char __gbk_from_ucs4_tab12[][2] = outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/ibm932.c b/iconvdata/ibm932.c index 1e87067..420b19a 100644 --- a/iconvdata/ibm932.c +++ b/iconvdata/ibm932.c @@ -20,13 +20,9 @@ #include <dlfcn.h> #include <stdint.h> +#include <stdbool.h> #include "ibm932.h" -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif - #define FROM 0 #define TO 1 @@ -50,38 +46,24 @@ #define LOOPFCT FROM_LOOP #define BODY \ { \ - const struct gap *rp1 = __ibm932sb_to_ucs4_idx; \ const struct gap *rp2 = __ibm932db_to_ucs4_idx; \ uint32_t ch = *inptr; \ uint32_t res; \ \ - if (__builtin_expect (ch >= 0xffff, 0)) \ - { \ - rp1 = NULL; \ - rp2 = NULL; \ - } \ - else if (__builtin_expect (ch, 0) == 0x80 \ - || __builtin_expect (ch, 0) == 0xa0 \ - || __builtin_expect (ch, 0) == 0xfd \ - || __builtin_expect (ch, 0) == 0xfe \ - || __builtin_expect (ch, 0) == 0xff) \ + if (__builtin_expect (ch == 0x80, 0) \ + || __builtin_expect (ch == 0xa0, 0) \ + || __builtin_expect (ch == 0xfd, 0) \ + || __builtin_expect (ch == 0xfe, 0) \ + || __builtin_expect (ch == 0xff, 0)) \ { \ /* This is an illegal character. */ \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ - else \ - { \ - while (ch > rp1->end) \ - ++rp1; \ - } \ \ /* Use the IBM932 table for single byte. */ \ - if (__builtin_expect (rp1 == NULL, 0) \ - || __builtin_expect (ch < rp1->start, 0) \ - || (res = __ibm932sb_to_ucs4[ch + rp1->idx], \ - __builtin_expect (res, '\1') == 0 && ch != 0)) \ + res = __ibm932sb_to_ucs4[ch]; \ + if (__builtin_expect (res == 0, 0) && ch != 0) \ { \ - \ /* Use the IBM932 table for double byte. */ \ if (__builtin_expect (inptr + 1 >= inend, 0)) \ { \ @@ -128,6 +110,25 @@ } \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c == 0x80 || c == 0xa0 || c >= 0xfd) \ + return WEOF; \ + uint32_t res = __ibm932sb_to_ucs4[c]; \ + if (res == 0 && c != 0) \ + return WEOF; \ + if (res == 0x1c) \ + res = 0x1a; \ + else if (res == 0x7f) \ + res = 0x1c; \ + else if (res == 0xa5) \ + res = 0x5c; \ + else if (res == 0x203e) \ + res = 0x7e; \ + else if (res == 0x1a) \ + res = 0x7f; \ + return res; \ + } #include <iconv/loop.c> /* Next, define the other direction. */ @@ -140,7 +141,7 @@ const struct gap *rp = __ucs4_to_ibm932sb_idx; \ unsigned char sc; \ uint32_t ch = get32 (inptr); \ - uint16_t found = TRUE; \ + bool found = true; \ uint32_t i; \ uint32_t low; \ uint32_t high; \ @@ -163,7 +164,7 @@ { \ \ /* Use the UCS4 table for double byte. */ \ - found = FALSE; \ + found = false; \ low = 0; \ high = (sizeof (__ucs4_to_ibm932db) >> 1) \ / sizeof (__ucs4_to_ibm932db[0][FROM]); \ @@ -178,7 +179,7 @@ else \ { \ pccode = __ucs4_to_ibm932db[i][TO]; \ - found = TRUE; \ + found = true; \ break; \ } \ } \ diff --git a/iconvdata/ibm932.h b/iconvdata/ibm932.h index 500beb8..464d4ed 100644 --- a/iconvdata/ibm932.h +++ b/iconvdata/ibm932.h @@ -1,5 +1,5 @@ /* Tables for conversion from and to IBM932. - Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 2000-2002 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2000. @@ -31,12 +31,6 @@ struct gap int32_t idx; }; -static const struct gap __ibm932sb_to_ucs4_idx[] = -{ - { start: 0x0000, end: 0x00ff, idx: 0 }, - { start: 0xffff, end: 0xffff, idx: 0 } -}; - static const uint16_t __ibm932sb_to_ucs4[] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, diff --git a/iconvdata/ibm943.c b/iconvdata/ibm943.c index 818f682..bf387bb 100644 --- a/iconvdata/ibm943.c +++ b/iconvdata/ibm943.c @@ -20,13 +20,9 @@ #include <dlfcn.h> #include <stdint.h> +#include <stdbool.h> #include "ibm943.h" -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif - #define FROM 0 #define TO 1 @@ -50,38 +46,25 @@ #define LOOPFCT FROM_LOOP #define BODY \ { \ - const struct gap *rp1 = __ibm943sb_to_ucs4_idx; \ const struct gap *rp2 = __ibm943db_to_ucs4_idx; \ uint32_t ch = *inptr; \ uint32_t res; \ \ - if (__builtin_expect (ch >= 0xffff, 0)) \ - { \ - rp1 = NULL; \ - rp2 = NULL; \ - } \ - else if (__builtin_expect (ch, 0) == 0x80 \ - || __builtin_expect (ch, 0) == 0xa0 \ - || __builtin_expect (ch, 0) == 0xfd \ - || __builtin_expect (ch, 0) == 0xfe \ - || __builtin_expect (ch, 0) == 0xff) \ + if (__builtin_expect (ch == 0x80, 0) \ + || __builtin_expect (ch == 0xa0, 0) \ + || __builtin_expect (ch == 0xfd, 0) \ + || __builtin_expect (ch == 0xfe, 0) \ + || __builtin_expect (ch == 0xff, 0)) \ { \ /* This is an illegal character. */ \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ } \ - else \ - { \ - while (ch > rp1->end) \ - ++rp1; \ - } \ \ /* Use the IBM943 table for single byte. */ \ - if (__builtin_expect (rp1 == NULL, 0) \ - || __builtin_expect (ch < rp1->start, 0) \ - || (res = __ibm943sb_to_ucs4[ch + rp1->idx], \ - __builtin_expect (res, '\1') == 0 && ch != 0)) \ + if (__builtin_expect (ch > 0xdf, 0) \ + || (res = __ibm943sb_to_ucs4[ch], \ + __builtin_expect (res == 0, 0) && ch != 0)) \ { \ - \ /* Use the IBM943 table for double byte. */ \ if (__builtin_expect (inptr + 1 >= inend, 0)) \ { \ @@ -128,6 +111,25 @@ } \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c == 0x80 || c == 0xa0 || c >= 0xe0) \ + return WEOF; \ + uint32_t res = __ibm943sb_to_ucs4[c]; \ + if (res == 0 && c != 0) \ + return WEOF; \ + if (res == 0x1c) \ + res = 0x1a; \ + else if (res == 0x7f) \ + res = 0x1c; \ + else if (res == 0xa5) \ + res = 0x5c; \ + else if (res == 0x203e) \ + res = 0x7e; \ + else if (res == 0x1a) \ + res = 0x7f; \ + return res; \ + } #include <iconv/loop.c> /* Next, define the other direction. */ @@ -140,7 +142,7 @@ const struct gap *rp = __ucs4_to_ibm943sb_idx; \ unsigned char sc; \ uint32_t ch = get32(inptr); \ - uint16_t found = TRUE; \ + bool found = true; \ uint32_t i; \ uint32_t low; \ uint32_t high; \ @@ -163,7 +165,7 @@ { \ \ /* Use the UCS4 table for double byte. */ \ - found = FALSE; \ + found = false; \ low = 0; \ high = (sizeof (__ucs4_to_ibm943db) >> 1) \ / sizeof (__ucs4_to_ibm943db[0][FROM]); \ @@ -178,7 +180,7 @@ else \ { \ pccode = __ucs4_to_ibm943db[i][TO]; \ - found = TRUE; \ + found = true; \ break; \ } \ } \ diff --git a/iconvdata/ibm943.h b/iconvdata/ibm943.h index 09a3c8d..b523672 100644 --- a/iconvdata/ibm943.h +++ b/iconvdata/ibm943.h @@ -1,5 +1,5 @@ /* Tables for conversion from and to IBM943. - Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 2000-2002 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2000. @@ -31,12 +31,6 @@ struct gap int32_t idx; }; -static const struct gap __ibm943sb_to_ucs4_idx[] = -{ - { start: 0x0000, end: 0x00df, idx: 0 }, - { start: 0xffff, end: 0xffff, idx: 0 } -}; - static const uint16_t __ibm943sb_to_ucs4[] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, diff --git a/iconvdata/isiri-3342.c b/iconvdata/isiri-3342.c index 8b72863..6cc5abb 100644 --- a/iconvdata/isiri-3342.c +++ b/iconvdata/isiri-3342.c @@ -1,5 +1,5 @@ /* Conversion from and to ISIRI-3342. - Copyright (C) 1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2002 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. @@ -24,6 +24,9 @@ #define TABLES <isiri-3342.h> #define CHARSET_NAME "ISIRI-3342//" -#define HAS_HOLES (*inptr > 0x80) /* 0x80 really maps to 0x0000. */ +#define HAS_HOLES 1 + +/* 0x80 really maps to 0x0000. */ +#define NONNUL(c) ((c) != '\0' && (c) != 0x80) #include <8bit-gap.c> diff --git a/iconvdata/iso8859-1.c b/iconvdata/iso8859-1.c index dc39400..60ab29d 100644 --- a/iconvdata/iso8859-1.c +++ b/iconvdata/iso8859-1.c @@ -36,6 +36,10 @@ #define LOOPFCT FROM_LOOP #define BODY \ *((uint32_t *) outptr)++ = *inptr++; +#define ONEBYTE_BODY \ + { \ + return c; \ + } #include <iconv/loop.c> diff --git a/iconvdata/iso_6937-2.c b/iconvdata/iso_6937-2.c index a2b174e..b6bf2c0 100644 --- a/iconvdata/iso_6937-2.c +++ b/iconvdata/iso_6937-2.c @@ -402,7 +402,7 @@ static const char from_ucs4[][2] = \ if (__builtin_expect (ch >= 0xc1, 0) && ch <= 0xcf) \ { \ - /* Composed character. First test whether the next character \ + /* Composed character. First test whether the next byte \ is also available. */ \ int ch2; \ \ @@ -449,6 +449,13 @@ static const char from_ucs4[][2] = outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + uint32_t ch = to_ucs4[c]; \ + if (ch == 0 && c != '\0') \ + return WEOF; \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/iso_6937.c b/iconvdata/iso_6937.c index b800a4f..16263ed 100644 --- a/iconvdata/iso_6937.c +++ b/iconvdata/iso_6937.c @@ -394,7 +394,7 @@ static const char from_ucs4[][2] = \ if (__builtin_expect (ch >= 0xc1, 0) && ch <= 0xcf) \ { \ - /* Composed character. First test whether the next character \ + /* Composed character. First test whether the next byte \ is also available. */ \ int ch2; \ \ @@ -441,6 +441,13 @@ static const char from_ucs4[][2] = outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + uint32_t ch = to_ucs4[c]; \ + if (ch == 0 && c != '\0') \ + return WEOF; \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/johab.c b/iconvdata/johab.c index a83483e..6d6f922 100644 --- a/iconvdata/johab.c +++ b/iconvdata/johab.c @@ -276,6 +276,13 @@ johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2) outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c <= 0x7f) \ + return (c == 0x5c ? 0x20a9 : c); \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/shift_jisx0213.c b/iconvdata/shift_jisx0213.c index 119e001..82592ad 100644 --- a/iconvdata/shift_jisx0213.c +++ b/iconvdata/shift_jisx0213.c @@ -232,6 +232,20 @@ } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS , int *statep +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + { \ + if (c == 0x5c) \ + return 0xa5; \ + if (c == 0x7e) \ + return 0x203e; \ + return c; \ + } \ + if (c >= 0xa1 && c <= 0xdf) \ + return 0xfec0 + c; \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/sjis.c b/iconvdata/sjis.c index 285f24f..4561779 100644 --- a/iconvdata/sjis.c +++ b/iconvdata/sjis.c @@ -4409,6 +4409,20 @@ static const char from_ucs4_extra[0x100][2] = outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + { \ + if (c == 0x5c) \ + return 0xa5; \ + if (c == 0x7e) \ + return 0x203e; \ + return c; \ + } \ + if (c >= 0xa1 && c <= 0xdf) \ + return 0xfec0 + c; \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/iconvdata/t.61.c b/iconvdata/t.61.c index 5227802..1e84fe9 100644 --- a/iconvdata/t.61.c +++ b/iconvdata/t.61.c @@ -387,7 +387,7 @@ static const char from_ucs4[][2] = \ if (__builtin_expect (ch >= 0xc1, 0) && ch <= 0xcf) \ { \ - /* Composed character. First test whether the next character \ + /* Composed character. First test whether the next byte \ is also available. */ \ uint32_t ch2; \ \ @@ -427,6 +427,13 @@ static const char from_ucs4[][2] = inptr += increment; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + uint32_t ch = to_ucs4[c]; \ + if (ch == 0 && c != '\0') \ + return WEOF; \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/tcvn5712-1.c b/iconvdata/tcvn5712-1.c index 7cd6f01..8194ef0 100644 --- a/iconvdata/tcvn5712-1.c +++ b/iconvdata/tcvn5712-1.c @@ -474,6 +474,20 @@ static const struct ++inptr; \ } #define EXTRA_LOOP_DECLS , int *statep +#define ONEBYTE_BODY \ + { \ + uint32_t ch; \ + \ + if (c < 0x18) \ + ch = map_from_tcvn_low[c]; \ + else if (c >= 0x80) \ + ch = map_from_tcvn_high[c - 0x80]; \ + else \ + ch = c; \ + if (ch >= 0x0041 && ch <= 0x01b0) \ + return WEOF; \ + return ch; \ + } #include <iconv/loop.c> diff --git a/iconvdata/uhc.c b/iconvdata/uhc.c index 8724768..5d25b86 100644 --- a/iconvdata/uhc.c +++ b/iconvdata/uhc.c @@ -3073,7 +3073,7 @@ static const char uhc_hangul_from_ucs[11172][2] = } \ else \ { \ - /* Two-byte character. First test whether the next character \ + /* Two-byte character. First test whether the next byte \ is also available. */ \ uint32_t ch2; \ \ @@ -3147,6 +3147,13 @@ static const char uhc_hangul_from_ucs[11172][2] = outptr += 4; \ } #define LOOP_NEED_FLAGS +#define ONEBYTE_BODY \ + { \ + if (c < 0x80) \ + return c; \ + else \ + return WEOF; \ + } #include <iconv/loop.c> diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog index 62dea84..53ef5c6 100644 --- a/linuxthreads/ChangeLog +++ b/linuxthreads/ChangeLog @@ -1,3 +1,9 @@ +2002-11-28 Ulrich Drepper <drepper@redhat.com> + + * sysdeps/unix/sysv/linux/bits/posix_opt.h: Define macros which + require it to 200112L. Remove _POSIX_POLL and _POSIX_SELECT. + * sysdeps/unix/sysv/linux/i386/bits/posix_opt.h: Likewise. + 2002-11-19 Ulrich Drepper <drepper@redhat.com> * Versions (libc:GLIBC_2.0): Remove names of functions which are diff --git a/wcsmbs/btowc.c b/wcsmbs/btowc.c index ca75e28..1ba0221 100644 --- a/wcsmbs/btowc.c +++ b/wcsmbs/btowc.c @@ -30,12 +30,6 @@ wint_t __btowc (c) int c; { - wchar_t result; - struct __gconv_step_data data; - unsigned char inbuf[1]; - const unsigned char *inptr = inbuf; - size_t dummy; - int status; const struct gconv_fcts *fcts; /* If the parameter does not fit into one byte or it is the EOF value @@ -43,32 +37,51 @@ __btowc (c) if (c < SCHAR_MIN || c > UCHAR_MAX || c == EOF) return WEOF; - /* Tell where we want the result. */ - data.__outbuf = (unsigned char *) &result; - data.__outbufend = data.__outbuf + sizeof (wchar_t); - data.__invocation_counter = 0; - data.__internal_use = 1; - data.__flags = __GCONV_IS_LAST; - data.__statep = &data.__state; - data.__trans = NULL; - - /* Make sure we start in the initial state. */ - memset (&data.__state, '\0', sizeof (mbstate_t)); - /* Get the conversion functions. */ fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); - /* Create the input string. */ - inbuf[0] = c; + if (__builtin_expect (fcts->towc_nsteps == 1, 1) + && __builtin_expect (fcts->towc->__btowc_fct != NULL, 1)) + { + /* Use the shortcut function. */ + return DL_CALL_FCT (fcts->towc->__btowc_fct, + (fcts->towc, (unsigned char) c)); + } + else + { + /* Fall back to the slow but generic method. */ + wchar_t result; + struct __gconv_step_data data; + unsigned char inbuf[1]; + const unsigned char *inptr = inbuf; + size_t dummy; + int status; + + /* Tell where we want the result. */ + data.__outbuf = (unsigned char *) &result; + data.__outbufend = data.__outbuf + sizeof (wchar_t); + data.__invocation_counter = 0; + data.__internal_use = 1; + data.__flags = __GCONV_IS_LAST; + data.__statep = &data.__state; + data.__trans = NULL; + + /* Make sure we start in the initial state. */ + memset (&data.__state, '\0', sizeof (mbstate_t)); + + /* Create the input string. */ + inbuf[0] = c; + + status = DL_CALL_FCT (fcts->towc->__fct, + (fcts->towc, &data, &inptr, inptr + 1, + NULL, &dummy, 0, 1)); - status = DL_CALL_FCT (fcts->towc->__fct, - (fcts->towc, &data, &inptr, inptr + 1, - NULL, &dummy, 0, 1)); - /* The conversion failed. */ - if (status != __GCONV_OK && status != __GCONV_FULL_OUTPUT - && status != __GCONV_EMPTY_INPUT) - result = WEOF; + if (status != __GCONV_OK && status != __GCONV_FULL_OUTPUT + && status != __GCONV_EMPTY_INPUT) + /* The conversion failed. */ + result = WEOF; - return result; + return result; + } } weak_alias (__btowc, btowc) diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c index bf23d25..2fea6c3 100644 --- a/wcsmbs/wcsmbsload.c +++ b/wcsmbs/wcsmbsload.c @@ -37,6 +37,7 @@ static struct __gconv_step to_wc = .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT", .__to_name = (char *) "INTERNAL", .__fct = __gconv_transform_ascii_internal, + .__btowc_fct = __gconv_btwoc_ascii, .__init_fct = NULL, .__end_fct = NULL, .__min_needed_from = 1, @@ -55,6 +56,7 @@ static struct __gconv_step to_mb = .__from_name = (char *) "INTERNAL", .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT", .__fct = __gconv_transform_internal_ascii, + .__btowc_fct = NULL, .__init_fct = NULL, .__end_fct = NULL, .__min_needed_from = 4, @@ -225,7 +227,8 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy) /* Copy the data. */ *copy = *orig; - /* Now increment the usage counters. */ + /* Now increment the usage counters. + Note: This assumes copy->towc_nsteps == 1 and copy->tomb_nsteps == 1. */ if (copy->towc->__shlib_handle != NULL) ++copy->towc->__counter; if (copy->tomb->__shlib_handle != NULL) |