From 04ea3b0fbb9ca56a04b437c57c2878842d331c77 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 29 Aug 2000 01:20:23 +0000 Subject: Update. 2000-08-27 Bruno Haible * string/strxfrm.c (strxfrm, wcsxfrm): Include . If nrules == 0 and srclen < n, copy only srclen + 1 characters. * sysdeps/generic/getdomain.c (getdomainname): Include . If the result is fits in the buffer, copy only as many bytes as needed. * sysdeps/generic/_strerror.c (__strerror_r): Don't zero-fill the buffer after copying numbuf into it. * sysdeps/mach/_strerror.c (__strerror_r): Likewise. 2000-08-27 Bruno Haible * posix/confstr.c (confstr): When string_len > len, NUL-terminate the result. When string_len < len, don't clear the rest of the buffer. 2000-08-27 Bruno Haible Support for new LC_COLLATE format. * locale/coll-lookup.h: New file. * locale/weightwc.h (findidx): When size == 0, call collidx_table_lookup. * wcsmbs/wcscoll.c: Include coll-lookup.h. * wcsmbs/wcsxfrm.c: Likewise. * posix/fnmatch.c: Likewise. * posix/fnmatch_loop.c (internal_fnwmatch): When size == 0, call collseq_table_lookup. * locale/programs/3level.h: New file. * locale/programs/ld-ctype.c: (wcwidth_table, wctrans_table): Define by including "3level.h". * locale/programs/ld-collate.c (wchead_table, collidx_table, collseq_table): New types, defined by including "3level.h". (locale_collate_t): New wcheads_3level, wcseqorder_3level fields. (encoding_mask, encoding_byte): Remove. (utf8_encode): Use simple shifts instead. (collate_finish): When !oldstyle_tables, set plane_size and plane_cnt to 0, and initialize and fill wcheads_3level and wcseqorder_3level. (collate_output): New local variable tablewc_3level. When !oldstyle_tables, set table_size to 0 and names to NULL and fill tablewc_3level instead of tablewc. Change format of TABLEWC and COLLSEQWC entries written to the file. * locale/C-collate.c (collseqwc): Change format. (_nl_C_LC_COLLATE): Set HASH_SIZE and HASH_LAYERS to 0, change format of COLLSEQWC. * locale/Makefile (distribute): Add coll-lookup.h, programs/3level.h. 2000-08-27 Bruno Haible * locale/programs/ld-ctype.c (MAX_CHARNAMES_IDX): New macro. (locale_ctype_t): New charnames_idx field. (ctype_startup): Initialize charnames_idx field. (find_idx): Speed up dramatically by using charnames_idx inverse table. 2000-08-27 Bruno Haible * locale/C-ctype.c: Switch to new locale format. (_nl_C_LC_CTYPE_names): Remove array. (STRUCT_CTYPE_CLASS): New macro. (_nl_C_LC_CTYPE_class_{upper,lower,alpha,digit,xdigit,space,print, graph,blank,cntrl,punct,alnum}, _nl_C_LC_CTYPE_map_{toupper,tolower}): New three-level tables. (_nl_C_LC_CTYPE_width): Change from array to three-level table. (_nl_C_LC_CTYPE): Fix nstrings value. Set HASH_SIZE and HASH_LAYERS to 0. Change WIDTH format. Set CLASS_OFFSET and MAP_OFFSET. Add 12 class tables and 2 map tables at the end. * ctype/ctype-info.c (_nl_C_LC_CTYPE_names): Remove declaration. (_nl_C_LC_CTYPE_class_{upper,lower,alpha,digit,xdigit,space,print, graph,blank,cntrl,punct,alnum}, _nl_C_LC_CTYPE_map_{toupper,tolower}): New declarations. (b): Remove trailing semicolon. (__ctype_names, __ctype_width): Don't initialize. (__ctype32_wctype, __ctype32_wctrans, __ctype32_width): Initialize. 2000-08-27 Bruno Haible * elf/dl-load.c (open_path): Add a argument telling whether *dirsp is guaranteed to be allocated with the same malloc() and may be passed to free(). (_dl_map_object): Update open_path calls. If rtld_search_dirs has been set to empty by an earlier open_path call, don't pass it again. --- locale/C-collate.c | 26 +- locale/C-ctype.c | 357 ++++++++++++++++++++----- locale/Makefile | 5 +- locale/coll-lookup.h | 101 ++++++++ locale/programs/3level.h | 321 +++++++++++++++++++++++ locale/programs/ld-collate.c | 602 +++++++++++++++++++++++++------------------ locale/programs/ld-ctype.c | 507 +++--------------------------------- locale/weightwc.h | 26 +- 8 files changed, 1155 insertions(+), 790 deletions(-) create mode 100644 locale/coll-lookup.h create mode 100644 locale/programs/3level.h (limited to 'locale') diff --git a/locale/C-collate.c b/locale/C-collate.c index f8d1430..a0ba0ff 100644 --- a/locale/C-collate.c +++ b/locale/C-collate.c @@ -58,6 +58,12 @@ static const char collseqmb[] = static const uint32_t collseqwc[] = { + 8, 1, 8, 0x0, 0xff, + /* 1st-level table */ + 6 * sizeof (uint32_t), + /* 2nd-level table */ + 7 * sizeof (uint32_t), + /* 3rd-level table */ L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07', L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f', L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17', @@ -101,23 +107,41 @@ const struct locale_data _nl_C_LC_COLLATE = NULL, 18, { + /* _NL_COLLATE_NRULES */ { word: 0 }, + /* _NL_COLLATE_RULESETS */ { string: NULL }, + /* _NL_COLLATE_TABLEMB */ { string: NULL }, + /* _NL_COLLATE_WEIGHTMB */ { string: NULL }, + /* _NL_COLLATE_EXTRAMB */ { string: NULL }, + /* _NL_COLLATE_INDIRECTMB */ { string: NULL }, + /* _NL_COLLATE_HASH_SIZE */ { word: 0 }, + /* _NL_COLLATE_HASH_LAYERS */ { word: 0 }, + /* _NL_COLLATE_NAMES */ { string: NULL }, + /* _NL_COLLATE_TABLEWC */ { string: NULL }, + /* _NL_COLLATE_WEIGHTWC */ { string: NULL }, + /* _NL_COLLATE_EXTRAWC */ { string: NULL }, + /* _NL_COLLATE_INDIRECTWC */ { string: NULL }, + /* _NL_COLLATE_SYMB_HASH_SIZEMB */ { string: NULL }, + /* _NL_COLLATE_SYMB_TABLEMB */ { string: NULL }, + /* _NL_COLLATE_SYMB_EXTRAMB */ { string: NULL }, + /* _NL_COLLATE_COLLSEQMB */ { string: collseqmb }, - { wstr: collseqwc } + /* _NL_COLLATE_COLLSEQWC */ + { string: (const char *) collseqwc } } }; diff --git a/locale/C-ctype.c b/locale/C-ctype.c index 23420c1..4f8e204 100644 --- a/locale/C-ctype.c +++ b/locale/C-ctype.c @@ -286,59 +286,248 @@ const uint32_t _nl_C_LC_CTYPE_tolower[384] = /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; -const uint32_t _nl_C_LC_CTYPE_names[256] = + +#define STRUCT_CTYPE_CLASS(p, q) \ + struct \ + { \ + uint32_t isctype_data[8]; \ + uint32_t header[5]; \ + uint32_t level1[1]; \ + uint32_t level2[1 << q]; \ + uint32_t level3[1 << p]; \ + } + +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_upper = { - /* 0x00 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - /* 0x08 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - /* 0x10 */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - /* 0x18 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - /* 0x20 */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - /* 0x28 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - /* 0x30 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - /* 0x38 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - /* 0x40 */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - /* 0x48 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - /* 0x50 */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - /* 0x58 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - /* 0x60 */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - /* 0x68 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - /* 0x70 */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - /* 0x78 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - /* 0x80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - /* 0x88 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - /* 0x90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - /* 0x98 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - /* 0xa0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - /* 0xa8 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - /* 0xb0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - /* 0xb8 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - /* 0xc0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - /* 0xc8 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - /* 0xd0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - /* 0xd8 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - /* 0xe0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, - /* 0xe8 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff + { 0x00000000, 0x00000000, 0x07fffffe, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x07fffffe, 0x00000000 } +}; +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_lower = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x07fffffe } +}; +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_alpha = +{ + { 0x00000000, 0x00000000, 0x07fffffe, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x07fffffe, 0x07fffffe } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_digit = +{ + { 0x00000000, 0x03ff0000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_xdigit = +{ + { 0x00000000, 0x03ff0000, 0x0000007e, 0x0000007e, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000, 0x0000007e, 0x0000007e } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_space = +{ + { 0x00003e00, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00003e00, 0x00000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_print = +{ + { 0x00000000, 0xffffffff, 0xffffffff, 0x7fffffff, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xffffffff, 0xffffffff, 0x7fffffff } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_graph = +{ + { 0x00000000, 0xfffffffe, 0xffffffff, 0x7fffffff, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xfffffffe, 0xffffffff, 0x7fffffff } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_blank = +{ + { 0x00000200, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000200, 0x00000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_cntrl = +{ + { 0xffffffff, 0x00000000, 0x00000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0xffffffff, 0x00000000, 0x00000000, 0x80000000 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_punct = +{ + { 0x00000000, 0xfc00fffe, 0xf8000001, 0x78000001, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xfc00fffe, 0xf8000001, 0x78000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_alnum = +{ + { 0x00000000, 0x03ff0000, 0x07fffffe, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000, 0x07fffffe, 0x07fffffe } }; -const char _nl_C_LC_CTYPE_width[256] = - /* 0x00 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x10 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x20 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x30 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x40 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x50 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x60 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x70 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x80 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x90 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xa0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xb0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xc0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xd0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xe0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xf0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" -; + +const struct +{ + uint32_t header[5]; + uint32_t level1[1]; + uint32_t level2[4]; + int32_t level3[32]; +} +_nl_C_LC_CTYPE_map_toupper = +{ + { 7, 1, 5, 3, 31 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 0, 0, 10 * sizeof (uint32_t) }, + /* 3rd-level table */ + { + 0x00000000, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + } +}, +_nl_C_LC_CTYPE_map_tolower = +{ + { 7, 1, 5, 3, 31 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 0, 10 * sizeof (uint32_t), 0 }, + /* 3rd-level table */ + { + 0x00000000, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + } +}; + +const struct +{ + uint32_t header[5]; + uint32_t level1[1]; + uint32_t level2[1]; + uint8_t level3[1]; +} +_nl_C_LC_CTYPE_width = +{ + { 7, 1, 0, 0, 0 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 1 } +}; + +/* Number of fields with fixed meanings, starting at 0. */ +#define NR_FIXED 70 +/* Number of class fields, starting at CLASS_OFFSET. */ +#define NR_CLASSES 12 +/* Number of map fields, starting at MAP_OFFSET. */ +#define NR_MAPS 2 + +/* Compile time verification of + NR_FIXED == _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1). */ +typedef int assertion1[1 - 2 * (NR_FIXED != _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))]; const struct locale_data _nl_C_LC_CTYPE = { @@ -347,18 +536,29 @@ const struct locale_data _nl_C_LC_CTYPE = UNDELETABLE, 1, /* Enable transliteration by default. */ NULL, - 66, + NR_FIXED + NR_CLASSES + NR_MAPS, { + /* _NL_CTYPE_CLASS */ { string: _nl_C_LC_CTYPE_class }, + /* _NL_CTYPE_TOUPPER */ { string: (const char *) _nl_C_LC_CTYPE_toupper }, + /* _NL_CTYPE_GAP1 */ { string: NULL }, + /* _NL_CTYPE_TOLOWER */ { string: (const char *) _nl_C_LC_CTYPE_tolower }, + /* _NL_CTYPE_GAP2 */ { string: NULL }, + /* _NL_CTYPE_CLASS32 */ { string: _nl_C_LC_CTYPE_class32 }, - { string: (const char *) _nl_C_LC_CTYPE_names }, + /* _NL_CTYPE_NAMES */ { string: NULL }, - { word: 256 }, - { word: 1 }, + /* _NL_CTYPE_GAP3 */ + { string: NULL }, + /* _NL_CTYPE_HASH_SIZE */ + { word: 0 }, + /* _NL_CTYPE_HASH_LAYERS */ + { word: 0 }, + /* _NL_CTYPE_CLASS_NAMES */ { string: "upper\0" "lower\0" "alpha\0" "digit\0" "xdigit\0" "space\0" "print\0" "graph\0" "blank\0" "cntrl\0" "punct\0" "alnum\0" #ifdef PREDEFINED_CLASSES @@ -370,19 +570,29 @@ const struct locale_data _nl_C_LC_CTYPE = "vowel_connect\0" #endif }, + /* _NL_CTYPE_MAP_NAMES */ { string: "toupper\0" "tolower\0" #ifdef PREDEFINED_CLASSES "tosymmetric\0" #endif }, - { string: _nl_C_LC_CTYPE_width }, + /* _NL_CTYPE_WIDTH */ + { string: (const char *) _nl_C_LC_CTYPE_width.header }, + /* _NL_CTYPE_MB_CUR_MAX */ { word: 1 }, + /* _NL_CTYPE_CODESET_NAME */ { string: "ANSI_X3.4-1968" }, + /* _NL_CTYPE_TOUPPER32 */ { string: (const char *) &_nl_C_LC_CTYPE_toupper[128] }, + /* _NL_CTYPE_TOLOWER32 */ { string: (const char *) &_nl_C_LC_CTYPE_tolower[128] }, - { word: 0 }, - { word: 0 }, + /* _NL_CTYPE_CLASS_OFFSET */ + { word: NR_FIXED }, + /* _NL_CTYPE_MAP_OFFSET */ + { word: NR_FIXED + NR_CLASSES }, + /* _NL_CTYPE_INDIGITS_MB_LEN */ { word: 1 }, + /* _NL_CTYPE_INDIGITS0_MB .. _NL_CTYPE_INDIGITS9_MB */ { string: "0" }, { string: "1" }, { string: "2" }, @@ -393,7 +603,9 @@ const struct locale_data _nl_C_LC_CTYPE = { string: "7" }, { string: "8" }, { string: "9" }, + /* _NL_CTYPE_INDIGITS_WC_LEN */ { word: 1 }, + /* _NL_CTYPE_INDIGITS0_WC .. _NL_CTYPE_INDIGITS9_WC */ { wstr: (uint32_t *) L"0" }, { wstr: (uint32_t *) L"1" }, { wstr: (uint32_t *) L"2" }, @@ -404,6 +616,7 @@ const struct locale_data _nl_C_LC_CTYPE = { wstr: (uint32_t *) L"7" }, { wstr: (uint32_t *) L"8" }, { wstr: (uint32_t *) L"9" }, + /* _NL_CTYPE_OUTDIGIT0_MB .. _NL_CTYPE_OUTDIGIT9_MB */ { string: "0" }, { string: "1" }, { string: "2" }, @@ -414,6 +627,7 @@ const struct locale_data _nl_C_LC_CTYPE = { string: "7" }, { string: "8" }, { string: "9" }, + /* _NL_CTYPE_OUTDIGIT0_WC .. _NL_CTYPE_OUTDIGIT9_WC */ { word: L'0' }, { word: L'1' }, { word: L'2' }, @@ -424,14 +638,39 @@ const struct locale_data _nl_C_LC_CTYPE = { word: L'7' }, { word: L'8' }, { word: L'9' }, + /* _NL_CTYPE_TRANSLIT_TAB_SIZE */ { word: NTRANSLIT }, + /* _NL_CTYPE_TRANSLIT_FROM_IDX */ { wstr: translit_from_idx }, + /* _NL_CTYPE_TRANSLIT_FROM_TBL */ { wstr: (uint32_t *) translit_from_tbl }, + /* _NL_CTYPE_TRANSLIT_TO_IDX */ { wstr: translit_to_idx }, + /* _NL_CTYPE_TRANSLIT_TO_TBL */ { wstr: (uint32_t *) translit_to_tbl }, + /* _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN */ { word: 1 }, + /* _NL_CTYPE_TRANSLIT_DEFAULT_MISSING */ { wstr: (uint32_t *) L"?" }, + /* _NL_CTYPE_TRANSLIT_IGNORE_LEN */ { word: 0 }, - { wstr: NULL } + /* _NL_CTYPE_TRANSLIT_IGNORE */ + { wstr: NULL }, + /* NR_CLASSES wctype_tables */ + { string: (const char *) _nl_C_LC_CTYPE_class_upper.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_lower.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_alpha.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_digit.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_xdigit.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_space.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_print.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_graph.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_blank.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_cntrl.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_punct.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_alnum.header }, + /* NR_MAPS wctrans_tables */ + { string: (const char *) _nl_C_LC_CTYPE_map_toupper.header }, + { string: (const char *) _nl_C_LC_CTYPE_map_tolower.header } } }; diff --git a/locale/Makefile b/locale/Makefile index 4bcdecb..ce9747b 100644 --- a/locale/Makefile +++ b/locale/Makefile @@ -25,14 +25,15 @@ headers = locale.h langinfo.h xlocale.h distribute = localeinfo.h categories.def iso-639.def iso-3166.def \ iso-4217.def weight.h weightwc.h strlen-hash.h elem-hash.h \ indigits.h indigitswc.h outdigits.h outdigitswc.h \ - C-translit.h.in C-translit.h gen-translit.pl \ + coll-lookup.h C-translit.h.in C-translit.h gen-translit.pl \ $(addprefix programs/, \ locale.c localedef.c \ $(localedef-modules:=.c) $(locale-modules:=.c) \ $(lib-modules:=.c) config.h simple-hash.h \ charmap-kw.gperf charmap-kw.h locfile-token.h \ locfile-kw.gperf locfile-kw.h linereader.h \ - locfile.h charmap.h repertoire.h localedef.h) + locfile.h charmap.h repertoire.h localedef.h \ + 3level.h) routines = setlocale findlocale loadlocale localeconv nl_langinfo \ mb_cur_max codeset_name \ newlocale duplocale freelocale diff --git a/locale/coll-lookup.h b/locale/coll-lookup.h new file mode 100644 index 0000000..ad0ff66 --- /dev/null +++ b/locale/coll-lookup.h @@ -0,0 +1,101 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible , 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Word tables are accessed by cutting wc in three blocks of bits: + - the high 32-q-p bits, + - the next q bits, + - the next p bits. + + +------------------+-----+-----+ + wc = + 32-q-p | q | p | + +------------------+-----+-----+ + + p and q are variable. For 16-bit Unicode it is sufficient to + choose p and q such that q+p <= 16. + + The table contains the following uint32_t words: + - q+p, + - s = upper exclusive bound for wc >> (q+p), + - p, + - 2^q-1, + - 2^p-1, + - 1st-level table: s offsets, pointing into the 2nd-level table, + - 2nd-level table: k*2^q offsets, pointing into the 3rd-level table, + - 3rd-level table: j*2^p words, each containing 32 bits of data. +*/ + +#include + +/* Lookup in a table of int32_t, with default value 0. */ +static inline int32_t +collidx_table_lookup (const char *table, uint32_t wc) +{ + uint32_t shift1 = ((const uint32_t *) table)[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = ((const uint32_t *) table)[1]; + if (index1 < bound) + { + uint32_t lookup1 = ((const uint32_t *) table)[5 + index1]; + if (lookup1 != 0) + { + uint32_t shift2 = ((const uint32_t *) table)[2]; + uint32_t mask2 = ((const uint32_t *) table)[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2]; + if (lookup2 != 0) + { + uint32_t mask3 = ((const uint32_t *) table)[4]; + uint32_t index3 = wc & mask3; + int32_t lookup3 = ((const int32_t *)(table + lookup2))[index3]; + + return lookup3; + } + } + } + return 0; +} + +/* Lookup in a table of uint32_t, with default value 0xffffffff. */ +static inline uint32_t +collseq_table_lookup (const char *table, uint32_t wc) +{ + uint32_t shift1 = ((const uint32_t *) table)[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = ((const uint32_t *) table)[1]; + if (index1 < bound) + { + uint32_t lookup1 = ((const uint32_t *) table)[5 + index1]; + if (lookup1 != 0) + { + uint32_t shift2 = ((const uint32_t *) table)[2]; + uint32_t mask2 = ((const uint32_t *) table)[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2]; + if (lookup2 != 0) + { + uint32_t mask3 = ((const uint32_t *) table)[4]; + uint32_t index3 = wc & mask3; + uint32_t lookup3 = ((const uint32_t *)(table + lookup2))[index3]; + + return lookup3; + } + } + } + return ~((uint32_t) 0); +} diff --git a/locale/programs/3level.h b/locale/programs/3level.h new file mode 100644 index 0000000..d829332 --- /dev/null +++ b/locale/programs/3level.h @@ -0,0 +1,321 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible , 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Construction of sparse 3-level tables. + See wchar-lookup.h or coll-lookup.h for their structure and the + meaning of p and q. + + Before including this file, set + TABLE to the name of the structure to be defined + ELEMENT to the type of every entry + DEFAULT to the default value for empty entries + ITERATE if you want the TABLE_iterate function to be defined + NO_FINALIZE if you don't want the TABLE_finalize function to be defined + + This will define + + struct TABLE; + void TABLE_init (struct TABLE *t); + ELEMENT TABLE_get (struct TABLE *t, uint32_t wc); + void TABLE_add (struct TABLE *t, uint32_t wc, ELEMENT value); + void TABLE_iterate (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)); + void TABLE_finalize (struct TABLE *t); +*/ + +#define CONCAT(a,b) CONCAT1(a,b) +#define CONCAT1(a,b) a##b + +struct TABLE +{ + /* Parameters. */ + unsigned int p; + unsigned int q; + /* Working representation. */ + size_t level1_alloc; + size_t level1_size; + uint32_t *level1; + size_t level2_alloc; + size_t level2_size; + uint32_t *level2; + size_t level3_alloc; + size_t level3_size; + ELEMENT *level3; + /* Compressed representation. */ + size_t result_size; + char *result; +}; + +/* Initialize. Assumes t->p and t->q have already been set. */ +static inline void +CONCAT(TABLE,_init) (struct TABLE *t) +{ + t->level1_alloc = t->level1_size = 0; + t->level2_alloc = t->level2_size = 0; + t->level3_alloc = t->level3_size = 0; +} + +/* Retrieve an entry. */ +static inline ELEMENT +CONCAT(TABLE,_get) (struct TABLE *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p); + if (index1 < t->level1_size) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != ~((uint32_t) 0)) + { + uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) + + (lookup1 << t->q); + uint32_t lookup2 = t->level2[index2]; + if (lookup2 != ~((uint32_t) 0)) + { + uint32_t index3 = (wc & ((1 << t->p) - 1)) + + (lookup2 << t->p); + ELEMENT lookup3 = t->level3[index3]; + + return lookup3; + } + } + } + return DEFAULT; +} + +/* Add one entry. */ +static void +CONCAT(TABLE,_add) (struct TABLE *t, uint32_t wc, ELEMENT value) +{ + uint32_t index1 = wc >> (t->q + t->p); + uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); + uint32_t index3 = wc & ((1 << t->p) - 1); + size_t i, i1, i2; + + if (value == CONCAT(TABLE,_get) (t, wc)) + return; + + if (index1 >= t->level1_size) + { + if (index1 >= t->level1_alloc) + { + size_t alloc = 2 * t->level1_alloc; + if (alloc <= index1) + alloc = index1 + 1; + t->level1 = (t->level1_alloc > 0 + ? (uint32_t *) xrealloc ((char *) t->level1, + alloc * sizeof (uint32_t)) + : (uint32_t *) xmalloc (alloc * sizeof (uint32_t))); + t->level1_alloc = alloc; + } + while (index1 >= t->level1_size) + t->level1[t->level1_size++] = ~((uint32_t) 0); + } + + if (t->level1[index1] == ~((uint32_t) 0)) + { + if (t->level2_size == t->level2_alloc) + { + size_t alloc = 2 * t->level2_alloc + 1; + t->level2 = (t->level2_alloc > 0 + ? (uint32_t *) xrealloc ((char *) t->level2, + (alloc << t->q) * sizeof (uint32_t)) + : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t))); + t->level2_alloc = alloc; + } + i1 = t->level2_size << t->q; + i2 = (t->level2_size + 1) << t->q; + for (i = i1; i < i2; i++) + t->level2[i] = ~((uint32_t) 0); + t->level1[index1] = t->level2_size++; + } + + index2 += t->level1[index1] << t->q; + + if (t->level2[index2] == ~((uint32_t) 0)) + { + if (t->level3_size == t->level3_alloc) + { + size_t alloc = 2 * t->level3_alloc + 1; + t->level3 = (t->level3_alloc > 0 + ? (ELEMENT *) xrealloc ((char *) t->level3, + (alloc << t->p) * sizeof (ELEMENT)) + : (ELEMENT *) xmalloc ((alloc << t->p) * sizeof (ELEMENT))); + t->level3_alloc = alloc; + } + i1 = t->level3_size << t->p; + i2 = (t->level3_size + 1) << t->p; + for (i = i1; i < i2; i++) + t->level3[i] = DEFAULT; + t->level2[index2] = t->level3_size++; + } + + index3 += t->level2[index2] << t->p; + + t->level3[index3] = value; +} + +#ifdef ITERATE +/* Apply a function to all entries in the table. */ +static void +CONCAT(TABLE,_iterate) (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)) +{ + uint32_t index1; + for (index1 = 0; index1 < t->level1_size; index1++) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != ~((uint32_t) 0)) + { + uint32_t lookup1_shifted = lookup1 << t->q; + uint32_t index2; + for (index2 = 0; index2 < (1 << t->q); index2++) + { + uint32_t lookup2 = t->level2[index2 + lookup1_shifted]; + if (lookup2 != ~((uint32_t) 0)) + { + uint32_t lookup2_shifted = lookup2 << t->p; + uint32_t index3; + for (index3 = 0; index3 < (1 << t->p); index3++) + { + ELEMENT lookup3 = t->level3[index3 + lookup2_shifted]; + if (lookup3 != DEFAULT) + fn ((((index1 << t->q) + index2) << t->p) + index3, + lookup3); + } + } + } + } + } +} +#endif + +#ifndef NO_FINALIZE +/* Finalize and shrink. */ +static void +CONCAT(TABLE,_finalize) (struct TABLE *t) +{ + size_t i, j, k; + uint32_t reorder3[t->level3_size]; + uint32_t reorder2[t->level2_size]; + uint32_t level1_offset, level2_offset, level3_offset, last_offset; + + /* Uniquify level3 blocks. */ + k = 0; + for (j = 0; j < t->level3_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)) == 0) + break; + /* Relocate block j to block i. */ + reorder3[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)); + k++; + } + } + t->level3_size = k; + + for (i = 0; i < (t->level2_size << t->q); i++) + if (t->level2[i] != ~((uint32_t) 0)) + t->level2[i] = reorder3[t->level2[i]]; + + /* Uniquify level2 blocks. */ + k = 0; + for (j = 0; j < t->level2_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder2[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)); + k++; + } + } + t->level2_size = k; + + for (i = 0; i < t->level1_size; i++) + if (t->level1[i] != ~((uint32_t) 0)) + t->level1[i] = reorder2[t->level1[i]]; + + /* Create and fill the resulting compressed representation. */ + last_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t) + + (t->level3_size << t->p) * sizeof (ELEMENT); + t->result_size = (last_offset + 3) & ~3ul; + t->result = (char *) xmalloc (t->result_size); + + level1_offset = + 5 * sizeof (uint32_t); + level2_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t); + + ((uint32_t *) t->result)[0] = t->q + t->p; + ((uint32_t *) t->result)[1] = t->level1_size; + ((uint32_t *) t->result)[2] = t->p; + ((uint32_t *) t->result)[3] = (1 << t->q) - 1; + ((uint32_t *) t->result)[4] = (1 << t->p) - 1; + + for (i = 0; i < t->level1_size; i++) + ((uint32_t *) (t->result + level1_offset))[i] = + (t->level1[i] == ~((uint32_t) 0) + ? 0 + : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); + + for (i = 0; i < (t->level2_size << t->q); i++) + ((uint32_t *) (t->result + level2_offset))[i] = + (t->level2[i] == ~((uint32_t) 0) + ? 0 + : (t->level2[i] << t->p) * sizeof (ELEMENT) + level3_offset); + + for (i = 0; i < (t->level3_size << t->p); i++) + ((ELEMENT *) (t->result + level3_offset))[i] = t->level3[i]; + + if (last_offset < t->result_size) + memset (t->result + last_offset, 0, t->result_size - last_offset); + + if (t->level1_alloc > 0) + free (t->level1); + if (t->level2_alloc > 0) + free (t->level2); + if (t->level3_alloc > 0) + free (t->level3); +} +#endif + +#undef TABLE +#undef ELEMENT +#undef DEFAULT +#undef ITERATE +#undef NO_FINALIZE diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 96ae542..6513d89 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -139,6 +139,26 @@ struct symbol_t size_t line; }; +/* Sparse table of struct element_t *. */ +#define TABLE wchead_table +#define ELEMENT struct element_t * +#define DEFAULT NULL +#define ITERATE +#define NO_FINALIZE +#include "3level.h" + +/* Sparse table of int32_t. */ +#define TABLE collidx_table +#define ELEMENT int32_t +#define DEFAULT 0 +#include "3level.h" + +/* Sparse table of uint32_t. */ +#define TABLE collseq_table +#define ELEMENT uint32_t +#define DEFAULT ~((uint32_t) 0) +#include "3level.h" + /* The real definition of the struct for the LC_COLLATE locale. */ struct locale_collate_t @@ -199,10 +219,12 @@ struct locale_collate_t /* Arrays with heads of the list for each of the leading bytes in the multibyte sequences. */ struct element_t **wcheads; + struct wchead_table wcheads_3level; /* The arrays with the collation sequence order. */ unsigned char mbseqorder[256]; uint32_t *wcseqorder; + struct collseq_table wcseqorder_3level; }; @@ -211,19 +233,6 @@ struct locale_collate_t static uint32_t nrules; -/* These are definitions used by some of the functions for handling - UTF-8 encoding below. */ -static const uint32_t encoding_mask[] = -{ - ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - - /* We need UTF-8 encoding of numbers. */ static inline int utf8_encode (char *buf, int val) @@ -240,11 +249,11 @@ utf8_encode (char *buf, int val) int step; for (step = 2; step < 6; ++step) - if ((val & encoding_mask[step - 2]) == 0) + if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0) break; retval = step; - *buf = encoding_byte[step - 2]; + *buf = (unsigned char) (~0xff >> step); --step; do { @@ -1635,109 +1644,126 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap) collate->mbheads[i] = &collate->undefined; } - /* Now to the wide character case. Here we have to find first a good - mapping function to get the wide range of wide character values - (0x00000000 to 0x7fffffff) to a managable table. This might take - some time so we issue a warning. - - We use a very trivial hashing function to store the sparse - table. CH % TABSIZE is used as an index. To solve multiple hits - we have N planes. This guarantees a fixed search time for a - character [N / 2]. In the following code we determine the minimum - value for TABSIZE * N, where TABSIZE >= 256. - - Some people complained that this algorithm takes too long. Well, - go on, improve it. But changing the step size is *not* an - option. Some people changed this to use only sizes of prime - numbers. Think again, do some math. We are looking for the - optimal solution, not something which works in general. Unless - somebody can provide a dynamic programming solution I think this - implementation is as good as it can get. */ - if (nr_wide_elems > 512 && !be_quiet) - fputs (_("\ + /* Now to the wide character case. */ + if (oldstyle_tables) + { + /* Here we have to find first a good mapping function to get the + wide range of wide character values (0x00000000 to 0x7fffffff) + to a managable table. This might take some time so we issue + a warning. + + We use a very trivial hashing function to store the sparse + table. CH % TABSIZE is used as an index. To solve multiple hits + we have N planes. This guarantees a fixed search time for a + character [N / 2]. In the following code we determine the minimum + value for TABSIZE * N, where TABSIZE >= 256. + + Some people complained that this algorithm takes too long. Well, + go on, improve it. But changing the step size is *not* an + option. Some people changed this to use only sizes of prime + numbers. Think again, do some math. We are looking for the + optimal solution, not something which works in general. Unless + somebody can provide a dynamic programming solution I think this + implementation is as good as it can get. */ + if (nr_wide_elems > 512 && !be_quiet) + fputs (_("\ Computing table size for collation table might take a while..."), - stderr); + stderr); - min_total = UINT_MAX; - act_size = 256; + min_total = UINT_MAX; + act_size = 256; - /* While we want to have a small total size we are willing to use a - little bit larger table if this reduces the number of layers. - Therefore we add a little penalty to the number of planes. - Maybe this constant has to be adjusted a bit. */ + /* While we want to have a small total size we are willing to use a + little bit larger table if this reduces the number of layers. + Therefore we add a little penalty to the number of planes. + Maybe this constant has to be adjusted a bit. */ #define PENALTY 128 - do - { - size_t cnt[act_size]; - struct element_t *elem[act_size]; - size_t act_planes = 1; + do + { + size_t cnt[act_size]; + struct element_t *elem[act_size]; + size_t act_planes = 1; - memset (cnt, '\0', sizeof cnt); - memset (elem, '\0', sizeof elem); + memset (cnt, '\0', sizeof cnt); + memset (elem, '\0', sizeof elem); - runp = collate->start; - while (runp != NULL) - { - if (runp->wcs != NULL) + runp = collate->start; + while (runp != NULL) { - size_t nr = runp->wcs[0] % act_size; - struct element_t *elemp = elem[nr]; - - while (elemp != NULL) + if (runp->wcs != NULL) { - if (elemp->wcs[0] == runp->wcs[0]) - break; - elemp = elemp->wcnext; - } + size_t nr = runp->wcs[0] % act_size; + struct element_t *elemp = elem[nr]; - if (elemp == NULL && ++cnt[nr] > act_planes) - { - act_planes = cnt[nr]; + while (elemp != NULL) + { + if (elemp->wcs[0] == runp->wcs[0]) + break; + elemp = elemp->wcnext; + } + + if (elemp == NULL && ++cnt[nr] > act_planes) + { + act_planes = cnt[nr]; - runp->wcnext = elem[nr]; - elem[nr] = runp; + runp->wcnext = elem[nr]; + elem[nr] = runp; - if ((act_size + PENALTY) * act_planes >= min_total) - break; + if ((act_size + PENALTY) * act_planes >= min_total) + break; + } } + + /* Up to the next entry. */ + runp = runp->next; } - /* Up to the next entry. */ - runp = runp->next; - } + if ((act_size + PENALTY) * act_planes < min_total) + { + min_total = (act_size + PENALTY) * act_planes; + collate->plane_size = act_size; + collate->plane_cnt = act_planes; + } - if ((act_size + PENALTY) * act_planes < min_total) - { - min_total = (act_size + PENALTY) * act_planes; - collate->plane_size = act_size; - collate->plane_cnt = act_planes; + ++act_size; } + while (act_size < min_total); + + if (nr_wide_elems > 512 && !be_quiet) + fputs (_(" done\n"), stderr); + + /* Now that we know how large the table has to be we are able to + allocate the array and start adding the characters to the lists + in the same way we did it for the multibyte characters. */ + collate->wcheads = (struct element_t **) + obstack_alloc (&collate->mempool, (collate->plane_size + * collate->plane_cnt + * sizeof (struct element_t *))); + memset (collate->wcheads, '\0', (collate->plane_size + * collate->plane_cnt + * sizeof (struct element_t *))); - ++act_size; + collate->wcseqorder = (uint32_t *) + obstack_alloc (&collate->mempool, (collate->plane_size + * collate->plane_cnt + * sizeof (uint32_t))); + memset (collate->wcseqorder, '\0', (collate->plane_size + * collate->plane_cnt + * sizeof (uint32_t))); } - while (act_size < min_total); - - if (nr_wide_elems > 512 && !be_quiet) - fputs (_(" done\n"), stderr); + else + { + collate->plane_size = 0; + collate->plane_cnt = 0; - /* Now that we know how large the table has to be we are able to - allocate the array and start adding the characters to the lists - in the same way we did it for the multibyte characters. */ - collate->wcheads = (struct element_t **) - obstack_alloc (&collate->mempool, (collate->plane_size - * collate->plane_cnt - * sizeof (struct element_t *))); - memset (collate->wcheads, '\0', (collate->plane_size - * collate->plane_cnt - * sizeof (struct element_t *))); + collate->wcheads_3level.p = 6; + collate->wcheads_3level.q = 10; + wchead_table_init (&collate->wcheads_3level); - collate->wcseqorder = (uint32_t *) - obstack_alloc (&collate->mempool, (collate->plane_size - * collate->plane_cnt - * sizeof (uint32_t))); - memset (collate->wcseqorder, '\0', (collate->plane_size - * collate->plane_cnt - * sizeof (uint32_t))); + collate->wcseqorder_3level.p = 6; + collate->wcseqorder_3level.q = 10; + collseq_table_init (&collate->wcseqorder_3level); + } /* Start adding. */ runp = collate->start; @@ -1745,26 +1771,42 @@ Computing table size for collation table might take a while..."), { if (runp->wcs != NULL) { + struct element_t *e; struct element_t **eptr; - struct element_t *lastp = NULL; + struct element_t *lastp; size_t idx; - /* Find a free index. */ - idx = runp->wcs[0] % collate->plane_size; - while (collate->wcheads[idx] != NULL) + if (oldstyle_tables) { - /* Stop if this is an entry with the same starting character. */ - if (collate->wcheads[idx]->wcs[0] == runp->wcs[0]) - break; + /* Find a free index. */ + idx = runp->wcs[0] % collate->plane_size; + while (collate->wcheads[idx] != NULL) + { + /* Stop if this is an entry with the same starting character. */ + if (collate->wcheads[idx]->wcs[0] == runp->wcs[0]) + break; - idx += collate->plane_size; + idx += collate->plane_size; + } + + /* Insert the collation sequence value. */ + collate->wcseqorder[idx] = runp->wcseqorder; + + /* Find the point where to insert in the list. */ + eptr = &collate->wcheads[idx]; } + else + { + /* Insert the collation sequence value. */ + collseq_table_add (&collate->wcseqorder_3level, runp->wcs[0], + runp->wcseqorder); - /* Insert the collation sequence value. */ - collate->wcseqorder[idx] = runp->wcseqorder; + /* Find the point where to insert in the list. */ + e = wchead_table_get (&collate->wcheads_3level, runp->wcs[0]); + eptr = &e; + } - /* Find the point where to insert in the list. */ - eptr = &collate->wcheads[idx]; + lastp = NULL; while (*eptr != NULL) { if ((*eptr)->nwcs < runp->nwcs) @@ -1778,7 +1820,7 @@ Computing table size for collation table might take a while..."), if (c == 0) { /* This should not happen. It means that we have - to symbols with the same byte sequence. It is + two symbols with the same byte sequence. It is of course an error. */ error_at_line (0, 0, (*eptr)->file, (*eptr)->line, _("symbol `%s' has the same encoding as"), @@ -1803,6 +1845,8 @@ Computing table size for collation table might take a while..."), if (*eptr != NULL) (*eptr)->wclast = runp; *eptr = runp; + if (!oldstyle_tables && eptr == &e) + wchead_table_add (&collate->wcheads_3level, runp->wcs[0], e); dont_insertwc: } @@ -1810,6 +1854,9 @@ Computing table size for collation table might take a while..."), runp = runp->next; } + if (!oldstyle_tables) + collseq_table_finalize (&collate->wcseqorder_3level); + /* Now determine whether the UNDEFINED entry is needed and if yes, whether it was defined. */ collate->undefined.used_in_level = need_undefined ? ~0ul : 0; @@ -1968,9 +2015,10 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, struct obstack extrapool; struct obstack indirectpool; struct section_list *sect; + size_t table_size; uint32_t *names; uint32_t *tablewc; - size_t table_size; + struct collidx_table tablewc_3level; uint32_t elem_size; uint32_t *elem_table; int i; @@ -2321,15 +2369,23 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, assert (idx[cnt] % 4 == 0); ++cnt; - /* Construct a table with the names. The size of the table is the same - as the table with the pointers. */ - table_size = collate->plane_size * collate->plane_cnt; - names = (uint32_t *) alloca (table_size * sizeof (uint32_t)); - for (ch = 0; ch < table_size; ++ch) - if (collate->wcheads[ch] == NULL) - names[ch] = 0; - else - names[ch] = collate->wcheads[ch]->wcs[0]; + if (oldstyle_tables) + { + /* Construct a table with the names. The size of the table is the same + as the table with the pointers. */ + table_size = collate->plane_size * collate->plane_cnt; + names = (uint32_t *) alloca (table_size * sizeof (uint32_t)); + for (ch = 0; ch < table_size; ++ch) + if (collate->wcheads[ch] == NULL) + names[ch] = 0; + else + names[ch] = collate->wcheads[ch]->wcs[0]; + } + else + { + table_size = 0; + names = NULL; + } assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NAMES)); iov[2 + cnt].iov_base = names; @@ -2363,95 +2419,111 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, with the same wide character and add them one after the other to the table. In case we have more than one sequence starting with the same byte we have to use extra indirection. */ - tablewc = (uint32_t *) alloca (table_size * sizeof (uint32_t)); - for (ch = 0; ch < table_size; ++ch) - if (collate->wcheads[ch] == NULL) - { - /* Set the entry to zero. */ - tablewc[ch] = 0; - } - else if (collate->wcheads[ch]->wcnext == NULL - && collate->wcheads[ch]->nwcs == 1) - { - tablewc[ch] = output_weightwc (&weightpool, collate, - collate->wcheads[ch]); - } - else + { + void add_to_tablewc (uint32_t ch, struct element_t *runp) { - /* As for the singlebyte table, we recognize sequences and - compress them. */ - struct element_t *runp = collate->wcheads[ch]; - struct element_t *lastp; - - tablewc[ch] = -(obstack_object_size (&extrapool) / sizeof (uint32_t)); - - do + if (runp->wcnext == NULL && runp->nwcs == 1) { - /* Store the current index in the weight table. We know that - the current position in the `extrapool' is aligned on a - 32-bit address. */ - int32_t weightidx; - int added; - - /* Find out wether this is a single entry or we have more than - one consecutive entry. */ - if (runp->wcnext != NULL - && runp->nwcs == runp->wcnext->nwcs - && wmemcmp ((wchar_t *) runp->wcs, - (wchar_t *)runp->wcnext->wcs, runp->nwcs - 1) == 0 - && (runp->wcs[runp->nwcs - 1] - == runp->wcnext->wcs[runp->nwcs - 1] + 1)) - { - int i; - struct element_t *series_startp = runp; - struct element_t *curp; + int32_t weigthidx = output_weightwc (&weightpool, collate, runp); + if (oldstyle_tables) + tablewc[ch] = weigthidx; + else + collidx_table_add (&tablewc_3level, ch, weigthidx); + } + else + { + /* As for the singlebyte table, we recognize sequences and + compress them. */ + struct element_t *lastp; - /* Now add first the initial byte sequence. */ - added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); - if (sizeof (int32_t) == sizeof (int)) - obstack_make_room (&extrapool, added); + if (oldstyle_tables) + tablewc[ch] = -(obstack_object_size (&extrapool) / sizeof (uint32_t)); + else + collidx_table_add (&tablewc_3level, ch, + -(obstack_object_size (&extrapool) / sizeof (uint32_t))); - /* More than one consecutive entry. We mark this by having - a negative index into the indirect table. */ - if (sizeof (int32_t) == sizeof (int)) - { - obstack_int_grow_fast (&extrapool, - -(obstack_object_size (&indirectpool) - / sizeof (int32_t))); - obstack_int_grow_fast (&extrapool, runp->nwcs - 1); - } - else + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)) { - int32_t i = -(obstack_object_size (&indirectpool) - / sizeof (int32_t)); - obstack_grow (&extrapool, &i, sizeof (int32_t)); - i = runp->nwcs - 1; - obstack_grow (&extrapool, &i, sizeof (int32_t)); - } + int i; + struct element_t *series_startp = runp; + struct element_t *curp; - do - runp = runp->wcnext; - while (runp->wcnext != NULL - && runp->nwcs == runp->wcnext->nwcs - && wmemcmp ((wchar_t *) runp->wcs, - (wchar_t *)runp->wcnext->wcs, - runp->nwcs - 1) == 0 - && (runp->wcs[runp->nwcs - 1] - == runp->wcnext->wcs[runp->nwcs - 1] + 1)); + /* Now add first the initial byte sequence. */ + added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); + if (sizeof (int32_t) == sizeof (int)) + obstack_make_room (&extrapool, added); - /* Now walk backward from here to the beginning. */ - curp = runp; + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + if (sizeof (int32_t) == sizeof (int)) + { + obstack_int_grow_fast (&extrapool, + -(obstack_object_size (&indirectpool) + / sizeof (int32_t))); + obstack_int_grow_fast (&extrapool, runp->nwcs - 1); + } + else + { + int32_t i = -(obstack_object_size (&indirectpool) + / sizeof (int32_t)); + obstack_grow (&extrapool, &i, sizeof (int32_t)); + i = runp->nwcs - 1; + obstack_grow (&extrapool, &i, sizeof (int32_t)); + } - for (i = 1; i < runp->nwcs; ++i) - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow_fast (&extrapool, curp->wcs[i]); - else - obstack_grow (&extrapool, &curp->wcs[i], sizeof (int32_t)); + do + runp = runp->wcnext; + while (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + for (i = 1; i < runp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, curp->wcs[i]); + else + obstack_grow (&extrapool, &curp->wcs[i], + sizeof (int32_t)); - /* Now find the end of the consecutive sequence and - add all the indeces in the indirect pool. */ - do - { + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weightwc (&weightpool, collate, + curp); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&indirectpool, weightidx); + else + obstack_grow (&indirectpool, &weightidx, + sizeof (int32_t)); + + curp = curp->wclast; + } + while (curp != series_startp); + + /* Add the final weight. */ weightidx = output_weightwc (&weightpool, collate, curp); if (sizeof (int32_t) == sizeof (int)) obstack_int_grow (&indirectpool, weightidx); @@ -2459,68 +2531,88 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, obstack_grow (&indirectpool, &weightidx, sizeof (int32_t)); - curp = curp->wclast; + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&extrapool, curp->wcs[i]); + else + obstack_grow (&extrapool, &curp->wcs[i], + sizeof (int32_t)); } - while (curp != series_startp); - - /* Add the final weight. */ - weightidx = output_weightwc (&weightpool, collate, curp); - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow (&indirectpool, weightidx); else - obstack_grow (&indirectpool, &weightidx, sizeof (int32_t)); - - /* And add the end byte sequence. Without length this - time. */ - for (i = 1; i < curp->nwcs; ++i) - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow (&extrapool, curp->wcs[i]); - else - obstack_grow (&extrapool, &curp->wcs[i], sizeof (int32_t)); - } - else - { - /* A single entry. Simply add the index and the length and - string (except for the first character which is already - tested for). */ - int i; + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; - /* Output the weight info. */ - weightidx = output_weightwc (&weightpool, collate, runp); + /* Output the weight info. */ + weightidx = output_weightwc (&weightpool, collate, runp); - added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); - if (sizeof (int) == sizeof (int32_t)) - obstack_make_room (&extrapool, added); + added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); + if (sizeof (int) == sizeof (int32_t)) + obstack_make_room (&extrapool, added); - if (sizeof (int32_t) == sizeof (int)) - { - obstack_int_grow_fast (&extrapool, weightidx); - obstack_int_grow_fast (&extrapool, runp->nwcs - 1); - } - else - { - int32_t l = runp->nwcs - 1; - obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); - obstack_grow (&extrapool, &l, sizeof (int32_t)); + if (sizeof (int32_t) == sizeof (int)) + { + obstack_int_grow_fast (&extrapool, weightidx); + obstack_int_grow_fast (&extrapool, runp->nwcs - 1); + } + else + { + int32_t l = runp->nwcs - 1; + obstack_grow (&extrapool, &weightidx, + sizeof (int32_t)); + obstack_grow (&extrapool, &l, sizeof (int32_t)); + } + for (i = 1; i < runp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, runp->wcs[i]); + else + obstack_grow (&extrapool, &runp->wcs[i], + sizeof (int32_t)); } - for (i = 1; i < runp->nwcs; ++i) - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow_fast (&extrapool, runp->wcs[i]); - else - obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t)); - } - /* Next entry. */ - lastp = runp; - runp = runp->wcnext; + /* Next entry. */ + lastp = runp; + runp = runp->wcnext; + } + while (runp != NULL); } - while (runp != NULL); } + if (oldstyle_tables) + { + tablewc = (uint32_t *) alloca (table_size * sizeof (uint32_t)); + + for (ch = 0; ch < table_size; ++ch) + if (collate->wcheads[ch] == NULL) + /* Set the entry to zero. */ + tablewc[ch] = 0; + else + add_to_tablewc (ch, collate->wcheads[ch]); + } + else + { + tablewc_3level.p = 6; + tablewc_3level.q = 10; + collidx_table_init (&tablewc_3level); + + wchead_table_iterate (&collate->wcheads_3level, add_to_tablewc); + + collidx_table_finalize (&tablewc_3level); + } + } + /* Now add the four tables. */ assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC)); - iov[2 + cnt].iov_base = tablewc; - iov[2 + cnt].iov_len = table_size * sizeof (uint32_t); + iov[2 + cnt].iov_base = (oldstyle_tables + ? (void *) tablewc + : (void *) tablewc_3level.result); + iov[2 + cnt].iov_len = (oldstyle_tables + ? table_size * sizeof (uint32_t) + : tablewc_3level.result_size); idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0); assert (idx[cnt] % 4 == 0); @@ -2672,8 +2764,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, ++cnt; assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)); - iov[2 + cnt].iov_base = collate->wcseqorder; - iov[2 + cnt].iov_len = table_size * sizeof (uint32_t); + iov[2 + cnt].iov_base = (oldstyle_tables + ? (void *) collate->wcseqorder + : (void *) collate->wcseqorder_3level.result); + iov[2 + cnt].iov_len = (oldstyle_tables + ? table_size * sizeof (uint32_t) + : collate->wcseqorder_3level.result_size); assert (idx[cnt] % 4 == 0); ++cnt; diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index e2d76b0..1f40fe8 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -112,6 +112,9 @@ struct locale_ctype_t uint32_t *charnames; size_t charnames_max; size_t charnames_act; + /* An index lookup table, to speedup find_idx. */ +#define MAX_CHARNAMES_IDX 0x10000 + uint32_t *charnames_idx; struct repertoire_t *repertoire; @@ -253,6 +256,10 @@ ctype_startup (struct linereader *lr, struct localedef_t *locale, for (cnt = 0; cnt < 256; ++cnt) ctype->charnames[cnt] = cnt; ctype->charnames_act = 256; + ctype->charnames_idx = + (uint32_t *) xmalloc (MAX_CHARNAMES_IDX * sizeof (uint32_t)); + for (cnt = 0; cnt < MAX_CHARNAMES_IDX; ++cnt) + ctype->charnames_idx[cnt] = ~((uint32_t) 0); /* Fill character class information. */ ctype->last_class_char = ILLEGAL_CHAR_VALUE; @@ -1299,9 +1306,23 @@ find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, if (idx < 256) return table == NULL ? NULL : &(*table)[idx]; - for (cnt = 256; cnt < ctype->charnames_act; ++cnt) - if (ctype->charnames[cnt] == idx) - break; + /* If idx is in the usual range, use the charnames_idx lookup table + instead of the slow search loop. */ + if (idx < MAX_CHARNAMES_IDX) + { + if (ctype->charnames_idx[idx] != ~((uint32_t) 0)) + /* Found. */ + cnt = ctype->charnames_idx[idx]; + else + /* Not found. */ + cnt = ctype->charnames_act; + } + else + { + for (cnt = 256; cnt < ctype->charnames_act; ++cnt) + if (ctype->charnames[cnt] == idx) + break; + } /* We have to distinguish two cases: the name is found or not. */ if (cnt == ctype->charnames_act) @@ -1315,6 +1336,8 @@ find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, sizeof (uint32_t) * ctype->charnames_max); } ctype->charnames[ctype->charnames_act++] = idx; + if (idx < MAX_CHARNAMES_IDX) + ctype->charnames_idx[idx] = cnt; } if (table == NULL) @@ -3582,473 +3605,23 @@ wctype_table_finalize (struct wctype_table *t) free (t->level3); } -struct wcwidth_table -{ - /* Parameters. */ - unsigned int p; - unsigned int q; - /* Working representation. */ - size_t level1_alloc; - size_t level1_size; - uint32_t *level1; - size_t level2_alloc; - size_t level2_size; - uint32_t *level2; - size_t level3_alloc; - size_t level3_size; - uint8_t *level3; - /* Compressed representation. */ - size_t result_size; - char *result; -}; - -/* Initialize. Assumes t->p and t->q have already been set. */ +#define TABLE wcwidth_table +#define ELEMENT uint8_t +#define DEFAULT 0xff +#include "3level.h" + +#define TABLE wctrans_table +#define ELEMENT int32_t +#define DEFAULT 0 +#define wctrans_table_add wctrans_table_add_internal +#include "3level.h" +#undef wctrans_table_add +/* The wctrans_table must actually store the difference between the + desired result and the argument. */ static inline void -wcwidth_table_init (struct wcwidth_table *t) -{ - t->level1_alloc = t->level1_size = 0; - t->level2_alloc = t->level2_size = 0; - t->level3_alloc = t->level3_size = 0; -} - -/* Retrieve an entry. */ -static inline uint8_t -wcwidth_table_get (struct wcwidth_table *t, uint32_t wc) -{ - uint32_t index1 = wc >> (t->q + t->p); - if (index1 < t->level1_size) - { - uint32_t lookup1 = t->level1[index1]; - if (lookup1 != ~((uint32_t) 0)) - { - uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) - + (lookup1 << t->q); - uint32_t lookup2 = t->level2[index2]; - if (lookup2 != ~((uint32_t) 0)) - { - uint32_t index3 = (wc & ((1 << t->p) - 1)) - + (lookup2 << t->p); - uint8_t lookup3 = t->level3[index3]; - - return lookup3; - } - } - } - return 0xff; -} - -/* Add one entry. */ -static void -wcwidth_table_add (struct wcwidth_table *t, uint32_t wc, uint8_t width) -{ - uint32_t index1 = wc >> (t->q + t->p); - uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); - uint32_t index3 = wc & ((1 << t->p) - 1); - size_t i, i1, i2; - - if (width == wcwidth_table_get (t, wc)) - return; - - if (index1 >= t->level1_size) - { - if (index1 >= t->level1_alloc) - { - size_t alloc = 2 * t->level1_alloc; - if (alloc <= index1) - alloc = index1 + 1; - t->level1 = (t->level1_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level1, - alloc * sizeof (uint32_t)) - : (uint32_t *) xmalloc (alloc * sizeof (uint32_t))); - t->level1_alloc = alloc; - } - while (index1 >= t->level1_size) - t->level1[t->level1_size++] = ~((uint32_t) 0); - } - - if (t->level1[index1] == ~((uint32_t) 0)) - { - if (t->level2_size == t->level2_alloc) - { - size_t alloc = 2 * t->level2_alloc + 1; - t->level2 = (t->level2_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level2, - (alloc << t->q) * sizeof (uint32_t)) - : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t))); - t->level2_alloc = alloc; - } - i1 = t->level2_size << t->q; - i2 = (t->level2_size + 1) << t->q; - for (i = i1; i < i2; i++) - t->level2[i] = ~((uint32_t) 0); - t->level1[index1] = t->level2_size++; - } - - index2 += t->level1[index1] << t->q; - - if (t->level2[index2] == ~((uint32_t) 0)) - { - if (t->level3_size == t->level3_alloc) - { - size_t alloc = 2 * t->level3_alloc + 1; - t->level3 = (t->level3_alloc > 0 - ? (uint8_t *) xrealloc ((char *) t->level3, - (alloc << t->p) * sizeof (uint8_t)) - : (uint8_t *) xmalloc ((alloc << t->p) * sizeof (uint8_t))); - t->level3_alloc = alloc; - } - i1 = t->level3_size << t->p; - i2 = (t->level3_size + 1) << t->p; - for (i = i1; i < i2; i++) - t->level3[i] = 0xff; - t->level2[index2] = t->level3_size++; - } - - index3 += t->level2[index2] << t->p; - - t->level3[index3] = width; -} - -/* Finalize and shrink. */ -static void -wcwidth_table_finalize (struct wcwidth_table *t) -{ - size_t i, j, k; - uint32_t reorder3[t->level3_size]; - uint32_t reorder2[t->level2_size]; - uint32_t level1_offset, level2_offset, level3_offset, last_offset; - - /* Uniquify level3 blocks. */ - k = 0; - for (j = 0; j < t->level3_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (uint8_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder3[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (uint8_t)); - k++; - } - } - t->level3_size = k; - - for (i = 0; i < (t->level2_size << t->q); i++) - if (t->level2[i] != ~((uint32_t) 0)) - t->level2[i] = reorder3[t->level2[i]]; - - /* Uniquify level2 blocks. */ - k = 0; - for (j = 0; j < t->level2_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder2[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)); - k++; - } - } - t->level2_size = k; - - for (i = 0; i < t->level1_size; i++) - if (t->level1[i] != ~((uint32_t) 0)) - t->level1[i] = reorder2[t->level1[i]]; - - /* Create and fill the resulting compressed representation. */ - last_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t) - + (t->level3_size << t->p) * sizeof (uint8_t); - t->result_size = (last_offset + 3) & ~3ul; - t->result = (char *) xmalloc (t->result_size); - - level1_offset = - 5 * sizeof (uint32_t); - level2_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t); - level3_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t); - - ((uint32_t *) t->result)[0] = t->q + t->p; - ((uint32_t *) t->result)[1] = t->level1_size; - ((uint32_t *) t->result)[2] = t->p; - ((uint32_t *) t->result)[3] = (1 << t->q) - 1; - ((uint32_t *) t->result)[4] = (1 << t->p) - 1; - - for (i = 0; i < t->level1_size; i++) - ((uint32_t *) (t->result + level1_offset))[i] = - (t->level1[i] == ~((uint32_t) 0) - ? 0 - : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); - - for (i = 0; i < (t->level2_size << t->q); i++) - ((uint32_t *) (t->result + level2_offset))[i] = - (t->level2[i] == ~((uint32_t) 0) - ? 0 - : (t->level2[i] << t->p) * sizeof (uint8_t) + level3_offset); - - for (i = 0; i < (t->level3_size << t->p); i++) - ((uint8_t *) (t->result + level3_offset))[i] = t->level3[i]; - - if (last_offset < t->result_size) - memset (t->result + last_offset, 0, t->result_size - last_offset); - - if (t->level1_alloc > 0) - free (t->level1); - if (t->level2_alloc > 0) - free (t->level2); - if (t->level3_alloc > 0) - free (t->level3); -} - -struct wctrans_table -{ - /* Parameters. */ - unsigned int p; - unsigned int q; - /* Working representation. */ - size_t level1_alloc; - size_t level1_size; - uint32_t *level1; - size_t level2_alloc; - size_t level2_size; - uint32_t *level2; - size_t level3_alloc; - size_t level3_size; - int32_t *level3; - /* Compressed representation. */ - size_t result_size; - char *result; -}; - -/* Initialize. Assumes t->p and t->q have already been set. */ -static inline void -wctrans_table_init (struct wctrans_table *t) -{ - t->level1_alloc = t->level1_size = 0; - t->level2_alloc = t->level2_size = 0; - t->level3_alloc = t->level3_size = 0; -} - -/* Retrieve an entry. */ -static inline uint32_t -wctrans_table_get (struct wctrans_table *t, uint32_t wc) -{ - uint32_t index1 = wc >> (t->q + t->p); - if (index1 < t->level1_size) - { - uint32_t lookup1 = t->level1[index1]; - if (lookup1 != ~((uint32_t) 0)) - { - uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) - + (lookup1 << t->q); - uint32_t lookup2 = t->level2[index2]; - if (lookup2 != ~((uint32_t) 0)) - { - uint32_t index3 = (wc & ((1 << t->p) - 1)) - + (lookup2 << t->p); - int32_t lookup3 = t->level3[index3]; - - return wc + lookup3; - } - } - } - return wc; -} - -/* Add one entry. */ -static void wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc) { - uint32_t index1 = wc >> (t->q + t->p); - uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); - uint32_t index3 = wc & ((1 << t->p) - 1); - int32_t value; - size_t i, i1, i2; - - if (mapped_wc == wctrans_table_get (t, wc)) - return; - - value = (int32_t) mapped_wc - (int32_t) wc; - - if (index1 >= t->level1_size) - { - if (index1 >= t->level1_alloc) - { - size_t alloc = 2 * t->level1_alloc; - if (alloc <= index1) - alloc = index1 + 1; - t->level1 = (t->level1_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level1, - alloc * sizeof (uint32_t)) - : (uint32_t *) xmalloc (alloc * sizeof (uint32_t))); - t->level1_alloc = alloc; - } - while (index1 >= t->level1_size) - t->level1[t->level1_size++] = ~((uint32_t) 0); - } - - if (t->level1[index1] == ~((uint32_t) 0)) - { - if (t->level2_size == t->level2_alloc) - { - size_t alloc = 2 * t->level2_alloc + 1; - t->level2 = (t->level2_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level2, - (alloc << t->q) * sizeof (uint32_t)) - : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t))); - t->level2_alloc = alloc; - } - i1 = t->level2_size << t->q; - i2 = (t->level2_size + 1) << t->q; - for (i = i1; i < i2; i++) - t->level2[i] = ~((uint32_t) 0); - t->level1[index1] = t->level2_size++; - } - - index2 += t->level1[index1] << t->q; - - if (t->level2[index2] == ~((uint32_t) 0)) - { - if (t->level3_size == t->level3_alloc) - { - size_t alloc = 2 * t->level3_alloc + 1; - t->level3 = (t->level3_alloc > 0 - ? (int32_t *) xrealloc ((char *) t->level3, - (alloc << t->p) * sizeof (int32_t)) - : (int32_t *) xmalloc ((alloc << t->p) * sizeof (int32_t))); - t->level3_alloc = alloc; - } - i1 = t->level3_size << t->p; - i2 = (t->level3_size + 1) << t->p; - for (i = i1; i < i2; i++) - t->level3[i] = 0; - t->level2[index2] = t->level3_size++; - } - - index3 += t->level2[index2] << t->p; - - t->level3[index3] = value; -} - -/* Finalize and shrink. */ -static void -wctrans_table_finalize (struct wctrans_table *t) -{ - size_t i, j, k; - uint32_t reorder3[t->level3_size]; - uint32_t reorder2[t->level2_size]; - uint32_t level1_offset, level2_offset, level3_offset; - - /* Uniquify level3 blocks. */ - k = 0; - for (j = 0; j < t->level3_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (int32_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder3[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (int32_t)); - k++; - } - } - t->level3_size = k; - - for (i = 0; i < (t->level2_size << t->q); i++) - if (t->level2[i] != ~((uint32_t) 0)) - t->level2[i] = reorder3[t->level2[i]]; - - /* Uniquify level2 blocks. */ - k = 0; - for (j = 0; j < t->level2_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder2[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)); - k++; - } - } - t->level2_size = k; - - for (i = 0; i < t->level1_size; i++) - if (t->level1[i] != ~((uint32_t) 0)) - t->level1[i] = reorder2[t->level1[i]]; - - /* Create and fill the resulting compressed representation. */ - t->result_size = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t) - + (t->level3_size << t->p) * sizeof (int32_t); - t->result = (char *) xmalloc (t->result_size); - - level1_offset = - 5 * sizeof (uint32_t); - level2_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t); - level3_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t); - - ((uint32_t *) t->result)[0] = t->q + t->p; - ((uint32_t *) t->result)[1] = t->level1_size; - ((uint32_t *) t->result)[2] = t->p; - ((uint32_t *) t->result)[3] = (1 << t->q) - 1; - ((uint32_t *) t->result)[4] = (1 << t->p) - 1; - - for (i = 0; i < t->level1_size; i++) - ((uint32_t *) (t->result + level1_offset))[i] = - (t->level1[i] == ~((uint32_t) 0) - ? 0 - : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); - - for (i = 0; i < (t->level2_size << t->q); i++) - ((uint32_t *) (t->result + level2_offset))[i] = - (t->level2[i] == ~((uint32_t) 0) - ? 0 - : (t->level2[i] << t->p) * sizeof (int32_t) + level3_offset); - - for (i = 0; i < (t->level3_size << t->p); i++) - ((int32_t *) (t->result + level3_offset))[i] = t->level3[i]; - - if (t->level1_alloc > 0) - free (t->level1); - if (t->level2_alloc > 0) - free (t->level2); - if (t->level3_alloc > 0) - free (t->level3); + wctrans_table_add_internal (t, wc, mapped_wc - wc); } diff --git a/locale/weightwc.h b/locale/weightwc.h index d0ca018..92bf47a 100644 --- a/locale/weightwc.h +++ b/locale/weightwc.h @@ -24,19 +24,29 @@ findidx (const wint_t **cpp) int_fast32_t i; const wint_t *cp; wint_t ch; - size_t idx; size_t cnt = 0; ch = *(*cpp)++; - idx = ch % size; - while (names[idx] != ch) + if (size != 0) { - if (++cnt == layers) - /* We didn't find the name. It is case for UNDEFINED. */ - return 0; - idx += size; + /* Old locale format. */ + size_t idx; + + idx = ch % size; + while (names[idx] != ch) + { + if (++cnt == layers) + /* We didn't find the name. It is case for UNDEFINED. */ + return 0; + idx += size; + } + i = table[idx]; + } + else + { + /* New locale format. */ + i = collidx_table_lookup ((const char *) table, ch); } - i = table[idx]; if (i >= 0) /* This is an index into the weight table. Cool. */ -- cgit v1.1