diff options
author | Ulrich Drepper <drepper@redhat.com> | 1999-08-31 07:04:41 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 1999-08-31 07:04:41 +0000 |
commit | 4b10dd6c1959577f57850ca427a94fe22b9f3299 (patch) | |
tree | b385d9b27e5a40d5baf7cd7e27c7cc5ef7129b5b /locale/programs | |
parent | 1d1740d6b12894ed6a430e2e98bf73c5243b2925 (diff) | |
download | glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.zip glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar.gz glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar.bz2 |
Update.
* locale/Makefile (distribute): Add iso-639.def and iso-3166.def.
Change charset.h to charmap.h.
(categories): Add new categories. Leave out collate for now.
Update build rules.
* locale/categories.def: Add definitions for new categories.
* locale/langinfo.h: Likewise.
* locale/locale.h: Likewise.
* locale/C-address.c: New file.
* locale/C-identification.c: New file.
* locale/C-measurement.c: New file.
* locale/C-name.c: New file.
* locale/C-paper.c: New file.
* locale/C-telephone.c: New file.
* locale/lc-address.c: Likewise.
* locale/lc-identification.c: Likewise.
* locale/lc-measurement.c: Likewise.
* locale/lc-name.c: Likewise.
* locale/lc-paper.c: Likewise.
* locale/lc-telephone.c: Likewise.
* locale/C-ctype.c: Update for locale rewrite.
* locale/C-messages.c: Likewise.
* locale/C-monetary.c: Likewise.
* locale/C-time.c: Likewise.
* locale/lc-collate.c: Likewise.
* locale/lc-ctype.c: Likewise.
* locale/lc-monetary.c: Likewise.
* locale/lc-time.c: Likewise.
* locale/localeinfo.h: Likewise.
* locale/newlocale.c: Likewise.
* locale/setlocale.c: Likewise.
* locale/weight.h: Likewise.
* locale/findlocale.c: Unconditionally use mmap.
Handle new categories.
* locale/loadlocale.c: Likewise.
* locale/iso-3166.def: New file.
* locale/iso-639.def: New file.
* locale/programs/charmap-kw.gperf: Add new keywords.
* locale/programs/locfile-kw.gperf: Likewise.
* locale/programs/locfile-token.h: Define new tokens.
* locale/programs/charmap.c: Rewrite to handle multibyte charsets.
* locale/programs/charmap.h: New file.
* locale/programs/charset.h: Removed.
* locale/programs/config.h: Add __LC_LAST.
* locale/programs/lc-address.c: New file.
* locale/programs/lc-identification.c: New file.
* locale/programs/lc-measurement.c: New file.
* locale/programs/lc-name.c: New file.
* locale/programs/lc-paper.c: New file.
* locale/programs/lc-telephone.c: New file.
* locale/programs/lc-collate.c: Update for locale rewrite.
* locale/programs/lc-ctype.c: Likewise.
* locale/programs/lc-messages.c: Likewise.
* locale/programs/lc-monetary.c: Likewise.
* locale/programs/lc-numeric.c: Likewise.
* locale/programs/lc-time.c: Likewise.
* locale/programs/locale.c: Likewise.
* locale/programs/localedef.c: Likewise.
* locale/programs/locfile.c: Likewise.
* locale/programs/repertoire.c: Likewise.
* locale/programs/repertoire.h: Likewise.
* locale/programs/locfile.c: Update prototypes.
Update handle_copy definition.
* locale/programs/linereader.c: Add handling of wide char strings and
new definition file syntax.
* locale/programs/linereader.h (struct token): Add elements for wide
character strings.
* locale/programs/locale-spec.c: Disable handling of collation
elements for now.
* locale/programs/simple-hash.h: Cleanup.
* locale/programs/stringtrans.h: Handle quite of end of line.
* string/strcoll.c: Fall back on strcmp for now.
* string/strxfrm.c: Fall back on strncpy/strlen for now.
* time/strftime.c: Use new wide character data for wcsftime.
* time/strptime.c: Remove _nl_C_LC_TIME declaration.
* wctype/cname-lookup.h: Update for new LC_CTYPE data.
Diffstat (limited to 'locale/programs')
32 files changed, 9880 insertions, 3236 deletions
diff --git a/locale/programs/charmap-kw.gperf b/locale/programs/charmap-kw.gperf index 1fb9c38..3424104 100644 --- a/locale/programs/charmap-kw.gperf +++ b/locale/programs/charmap-kw.gperf @@ -1,7 +1,7 @@ %{ -/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + Contributed by Ulrich Drepper, <drepper@gnu.org>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -33,6 +33,9 @@ g0esc, tok_g0esc, 1 g1esc, tok_g1esc, 1 g2esc, tok_g2esc, 1 g3esc, tok_g3esc, 1 +escseq, tok_escseq, 1 +addset, tok_addset, 1 +include, tok_include, 1 CHARMAP, tok_charmap, 0 END, tok_end, 0 WIDTH, tok_width, 0 diff --git a/locale/programs/charmap-kw.h b/locale/programs/charmap-kw.h index 3bfcd14..4b40216 100644 --- a/locale/programs/charmap-kw.h +++ b/locale/programs/charmap-kw.h @@ -1,8 +1,8 @@ -/* C code produced by gperf version 2.5 (GNU C++ version) */ -/* Command-line: gperf -acCgopt -k1,2,5,$ -N charmap_hash programs/charmap-kw.gperf */ -/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +/* ANSI-C code produced by gperf version 2.7.1 (19981006 egcs) */ +/* Command-line: gperf -acCgopt -k1,2,5,9,$ -L ANSI-C -N charmap_hash programs/charmap-kw.gperf */ +/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + Contributed by Ulrich Drepper, <drepper@gnu.org>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -24,81 +24,103 @@ #include "locfile-token.h" struct keyword_t ; -#define TOTAL_KEYWORDS 14 +#define TOTAL_KEYWORDS 17 #define MIN_WORD_LENGTH 3 #define MAX_WORD_LENGTH 14 #define MIN_HASH_VALUE 3 -#define MAX_HASH_VALUE 25 -/* maximum key range = 23, duplicates = 0 */ +#define MAX_HASH_VALUE 35 +/* maximum key range = 33, duplicates = 0 */ #ifdef __GNUC__ -inline +__inline #endif static unsigned int -hash (register const char *str, register int len) +hash (register const char *str, register unsigned int len) { static const unsigned char asso_values[] = { - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 14, 10, - 15, 4, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, - 26, 26, 0, 0, 26, 26, 26, 0, 0, 26, - 0, 26, 26, 26, 5, 26, 26, 0, 26, 26, - 26, 26, 26, 26, 26, 0, 26, 26, 0, 0, - 26, 0, 26, 0, 26, 26, 26, 26, 26, 0, - 15, 0, 0, 26, 0, 0, 26, 0, 26, 26, - 0, 26, 26, 26, 26, 26, 26, 26, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 25, 10, + 15, 20, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 10, 0, 0, + 5, 36, 0, 0, 36, 36, 36, 0, 0, 36, + 0, 36, 0, 36, 0, 36, 36, 0, 36, 36, + 36, 36, 36, 36, 36, 0, 36, 0, 0, 0, + 10, 0, 36, 0, 0, 0, 36, 36, 36, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 36, 36, + 25, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36 }; register int hval = len; switch (hval) { default: + case 9: + hval += asso_values[(unsigned char)str[8]]; + case 8: + case 7: + case 6: case 5: - hval += asso_values[str[4]]; + hval += asso_values[(unsigned char)str[4]]; case 4: case 3: case 2: - hval += asso_values[str[1]]; + hval += asso_values[(unsigned char)str[1]]; case 1: - hval += asso_values[str[0]]; + hval += asso_values[(unsigned char)str[0]]; break; } - return hval + asso_values[str[len - 1]]; + return hval + asso_values[(unsigned char)str[len - 1]]; } #ifdef __GNUC__ -inline +__inline #endif const struct keyword_t * -charmap_hash (register const char *str, register int len) +charmap_hash (register const char *str, register unsigned int len) { static const struct keyword_t wordlist[] = { - {"",}, {"",}, {"",}, - {"END", tok_end, 0}, - {"",}, - {"WIDTH", tok_width, 0}, - {"",}, - {"CHARMAP", tok_charmap, 0}, - {"",}, - {"g3esc", tok_g3esc, 1}, - {"mb_cur_max", tok_mb_cur_max, 1}, - {"escape_char", tok_escape_char, 1}, - {"comment_char", tok_comment_char, 1}, - {"code_set_name", tok_code_set_name, 1}, - {"WIDTH_VARIABLE", tok_width_variable, 0}, - {"g1esc", tok_g1esc, 1}, - {"",}, {"",}, - {"WIDTH_DEFAULT", tok_width_default, 0}, - {"g0esc", tok_g0esc, 1}, - {"g2esc", tok_g2esc, 1}, - {"",}, {"",}, {"",}, {"",}, - {"mb_cur_min", tok_mb_cur_min, 1}, + {""}, {""}, {""}, + {"END", tok_end, 0}, + {""}, + {"WIDTH", tok_width, 0}, + {"escseq", tok_escseq, 1}, + {"include", tok_include, 1}, + {""}, {""}, + {"mb_cur_min", tok_mb_cur_min, 1}, + {"escape_char", tok_escape_char, 1}, + {"comment_char", tok_comment_char, 1}, + {"code_set_name", tok_code_set_name, 1}, + {"WIDTH_VARIABLE", tok_width_variable, 0}, + {"g1esc", tok_g1esc, 1}, + {"addset", tok_addset, 1}, + {"CHARMAP", tok_charmap, 0}, + {"WIDTH_DEFAULT", tok_width_default, 0}, + {""}, + {"g2esc", tok_g2esc, 1}, + {""}, {""}, {""}, {""}, + {"g3esc", tok_g3esc, 1}, + {""}, {""}, {""}, {""}, + {"g0esc", tok_g0esc, 1}, + {""}, {""}, {""}, {""}, + {"mb_cur_max", tok_mb_cur_max, 1} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) @@ -109,7 +131,7 @@ charmap_hash (register const char *str, register int len) { register const char *s = wordlist[key].name; - if (*s == *str && !strncmp (str + 1, s + 1, len - 1)) + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c index fd9cc35..6db2b42 100644 --- a/locale/programs/charmap.c +++ b/locale/programs/charmap.c @@ -1,6 +1,6 @@ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -25,6 +25,7 @@ #include <dirent.h> #include <errno.h> #include <libintl.h> +#include <limits.h> #include <obstack.h> #include <stdlib.h> #include <string.h> @@ -32,13 +33,10 @@ #include "error.h" #include "linereader.h" -#include "charset.h" +#include "charmap.h" #include "locfile.h" #include "repertoire.h" - -/* Uncomment following line for production version. */ -/* define NDEBUG 1 */ #include <assert.h> @@ -49,17 +47,20 @@ extern void *xmalloc (size_t __n); /* Prototypes for local functions. */ -static struct charset_t *parse_charmap (const char *filename); -static void new_width (struct linereader *cmfile, struct charset_t *result, +static struct charmap_t *parse_charmap (const char *filename); +static void new_width (struct linereader *cmfile, struct charmap_t *result, const char *from, const char *to, unsigned long int width); +static void charmap_new_char (struct linereader *lr, struct charmap_t *cm, + int nbytes, char *bytes, const char *from, + const char *to, int decimal_ellipsis); -struct charset_t * +struct charmap_t * charmap_read (const char *filename) { const char *pathnfile; - struct charset_t *result = NULL; + struct charmap_t *result = NULL; if (filename != NULL) { @@ -175,16 +176,17 @@ charmap_read (const char *filename) } -static struct charset_t * +static struct charmap_t * parse_charmap (const char *filename) { struct linereader *cmfile; - struct charset_t *result; + struct charmap_t *result; int state; enum token_t expected_tok = tok_error; const char *expected_str = NULL; char *from_name = NULL; char *to_name = NULL; + enum token_t ellipsis = 0; /* Determine path. */ cmfile = lr_open (filename, charmap_hash); @@ -206,9 +208,12 @@ parse_charmap (const char *filename) return NULL; } + /* We don't want symbolic names in string to be translated. */ + cmfile->translate_strings = 0; + /* Allocate room for result. */ - result = (struct charset_t *) xmalloc (sizeof (struct charset_t)); - memset (result, '\0', sizeof (struct charset_t)); + result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t)); + memset (result, '\0', sizeof (struct charmap_t)); /* The default DEFAULT_WIDTH is 1. */ result->width_default = 1; @@ -216,7 +221,8 @@ parse_charmap (const char *filename) #define obstack_chunk_free free obstack_init (&result->mem_pool); - if (init_hash (&result->char_table, 256)) + if (init_hash (&result->char_table, 256) + || init_hash (&result->byte_table, 256)) { free (result); return NULL; @@ -228,7 +234,7 @@ parse_charmap (const char *filename) while (1) { /* What's on? */ - struct token *now = lr_token (cmfile, NULL); + struct token *now = lr_token (cmfile, NULL, NULL); enum token_t nowtok = now->tok; struct token *arg; @@ -275,22 +281,24 @@ parse_charmap (const char *filename) && nowtok != tok_mb_cur_min && nowtok != tok_escape_char && nowtok != tok_comment_char && nowtok != tok_g0esc && nowtok != tok_g1esc && nowtok != tok_g2esc - && nowtok != tok_g3esc) + && nowtok != tok_g3esc && nowtok != tok_repertoiremap + && nowtok != tok_include) { lr_error (cmfile, _("syntax error in prolog: %s"), - _("illegal definition")); + _("invalid definition")); lr_ignore_rest (cmfile, 0); continue; } /* We know that we need an argument. */ - arg = lr_token (cmfile, NULL); + arg = lr_token (cmfile, NULL, NULL); switch (nowtok) { case tok_code_set_name: - if (arg->tok != tok_ident && arg->tok != tok_string) + case tok_repertoiremap: + if (arg->tok != tok_ident) { badarg: lr_error (cmfile, _("syntax error in prolog: %s"), @@ -300,9 +308,14 @@ parse_charmap (const char *filename) continue; } - result->code_set_name = obstack_copy0 (&result->mem_pool, - arg->val.str.start, - arg->val.str.len); + if (nowtok == tok_code_set_name) + result->code_set_name = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); + else + result->repertoiremap = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); lr_ignore_rest (cmfile, 1); continue; @@ -312,12 +325,21 @@ parse_charmap (const char *filename) if (arg->tok != tok_number) goto badarg; - if (arg->val.num < 1 || arg->val.num > 4) + if (verbose + && ((nowtok == tok_mb_cur_max + && result->mb_cur_max != 0) + || (nowtok == tok_mb_cur_max + && result->mb_cur_max != 0))) + lr_error (cmfile, _("duplicate definition of <%s>"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); + + if (arg->val.num < 1) { lr_error (cmfile, - _("value for <%s> must lie between 1 and 4"), - nowtok == tok_mb_cur_min ? "mb_cur_min" - : "mb_cur_max"); + _("value for <%s> must be 1 or greater"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); lr_ignore_rest (cmfile, 0); continue; @@ -328,7 +350,8 @@ parse_charmap (const char *filename) && (int) arg->val.num > result->mb_cur_max)) { lr_error (cmfile, _("\ -value of <mb_cur_max> must be greater than the value of <mb_cur_min>")); +value of <%s> must be greater or equal than the value of <%s>"), + "mb_cur_max", "mb_cur_min"); lr_ignore_rest (cmfile, 0); continue; @@ -347,7 +370,7 @@ value of <mb_cur_max> must be greater than the value of <mb_cur_min>")); if (arg->tok != tok_ident) goto badarg; - if (arg->val.str.len != 1) + if (arg->val.str.lenmb != 1) { lr_error (cmfile, _("\ argument to <%s> must be a single character"), @@ -359,9 +382,9 @@ argument to <%s> must be a single character"), } if (nowtok == tok_escape_char) - cmfile->escape_char = *arg->val.str.start; + cmfile->escape_char = *arg->val.str.startmb; else - cmfile->comment_char = *arg->val.str.start; + cmfile->comment_char = *arg->val.str.startmb; lr_ignore_rest (cmfile, 1); continue; @@ -370,9 +393,15 @@ argument to <%s> must be a single character"), case tok_g1esc: case tok_g2esc: case tok_g3esc: + case tok_escseq: lr_ignore_rest (cmfile, 0); /* XXX */ continue; + case tok_include: + lr_error (cmfile, _("\ +character sets with locking states are not supported")); + exit (4); + default: /* Cannot happen. */ assert (! "Should not happen"); @@ -409,8 +438,8 @@ argument to <%s> must be a single character"), obstack_free (&result->mem_pool, from_name); from_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); to_name = NULL; state = 3; @@ -419,19 +448,20 @@ argument to <%s> must be a single character"), case 3: /* We have two possibilities: We can see an ellipsis or an encoding value. */ - if (nowtok == tok_ellipsis) + if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 + || nowtok == tok_ellipsis2) { + ellipsis = nowtok; state = 4; continue; } /* FALLTHROUGH */ case 5: - if (nowtok != tok_charcode && nowtok != tok_ucs2 - && nowtok != tok_ucs4) + if (nowtok != tok_charcode) { lr_error (cmfile, _("syntax error in %s definition: %s"), - "CHARMAP", _("illegal encoding given")); + "CHARMAP", _("invalid encoding given")); lr_ignore_rest (cmfile, 0); @@ -444,9 +474,9 @@ argument to <%s> must be a single character"), else if (now->val.charcode.nbytes > result->mb_cur_max) lr_error (cmfile, _("too many bytes in character encoding")); else - charset_new_char (cmfile, &result->char_table, - now->val.charcode.nbytes, - now->val.charcode.val, from_name, to_name); + charmap_new_char (cmfile, result, now->val.charcode.nbytes, + now->val.charcode.bytes, from_name, to_name, + ellipsis != tok_ellipsis2); /* Ignore trailing comment silently. */ lr_ignore_rest (cmfile, 0); @@ -470,8 +500,8 @@ argument to <%s> must be a single character"), /* Copy the to-name in a safe place. */ to_name = (char *) obstack_copy0 (&result->mem_pool, - cmfile->token.val.str.start, - cmfile->token.val.str.len); + cmfile->token.val.str.startmb, + cmfile->token.val.str.lenmb); state = 5; continue; @@ -557,15 +587,15 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); obstack_free (&result->mem_pool, from_name); from_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); to_name = NULL; state = 94; continue; case 94: - if (nowtok == tok_ellipsis) + if (nowtok == tok_ellipsis3) { state = 95; continue; @@ -602,8 +632,8 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); } to_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); state = 96; continue; @@ -637,15 +667,15 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); obstack_free (&result->mem_pool, from_name); from_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); to_name = NULL; state = 99; continue; case 99: - if (nowtok == tok_ellipsis) + if (nowtok == tok_ellipsis3) state = 100; /* Store info. */ @@ -663,8 +693,8 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); else { to_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); /* XXX Enter value into table. */ } @@ -690,13 +720,14 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); static void -new_width (struct linereader *cmfile, struct charset_t *result, +new_width (struct linereader *cmfile, struct charmap_t *result, const char *from, const char *to, unsigned long int width) { - unsigned int from_val, to_val; + struct charseq *from_val; + struct charseq *to_val; - from_val = charset_find_value (&result->char_table, from, strlen (from)); - if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE) + from_val = charmap_find_value (result, from, strlen (from)); + if (from_val == NULL) { lr_error (cmfile, _("unknown character `%s'"), from); return; @@ -706,8 +737,8 @@ new_width (struct linereader *cmfile, struct charset_t *result, to_val = from_val; else { - to_val = charset_find_value (&result->char_table, to, strlen (to)); - if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE) + to_val = charmap_find_value (result, to, strlen (to)); + if (to_val == NULL) { lr_error (cmfile, _("unknown character `%s'"), to); return; @@ -734,3 +765,140 @@ new_width (struct linereader *cmfile, struct charset_t *result, result->width_rules[result->nwidth_rules].width = (unsigned int) width; ++result->nwidth_rules; } + + +struct charseq * +charmap_find_value (const struct charmap_t *cm, const char *name, size_t len) +{ + void *result; + + return (find_entry ((hash_table *) &cm->char_table, name, len, &result) + < 0 ? NULL : (struct charseq *) result); +} + + +static void +charmap_new_char (struct linereader *lr, struct charmap_t *cm, + int nbytes, char *bytes, const char *from, const char *to, + int decimal_ellipsis) +{ + hash_table *ht = &cm->char_table; + hash_table *bt = &cm->byte_table; + struct obstack *ob = &cm->mem_pool; + char *from_end; + char *to_end; + const char *cp; + int prefix_len, len1, len2; + unsigned int from_nr, to_nr, cnt; + struct charseq *newp; + + len1 = strlen (from); + + if (to == NULL) + { + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = obstack_copy (ob, from, len1 + 1); + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + + insert_entry (ht, from, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note that it isn't a bug if a symbol is defined more + than once. All later definitions are simply discarded. */ + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len2 = strlen (to); + + if (len1 != len2) + { + illegal_range: + lr_error (lr, _("invalid names for character range")); + return; + } + + cp = &from[len1 - 1]; + if (decimal_ellipsis) + while (isdigit (*cp) && cp >= from) + --cp; + else + while (isxdigit (*cp) && cp >= from) + { + if (!isdigit (*cp) && !isupper (*cp)) + lr_error (lr, _("\ +hexadecimal range format should use only capital characters")); + --cp; + } + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto illegal_range; + + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); + if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, + decimal_ellipsis ? 10 : 16)) == ULONG_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are illegal names for range")); + return; + } + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is not higher then lower limit")); + return; + } + + for (cnt = from_nr; cnt <= to_nr; ++cnt) + { + char *name_end; + obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X", + prefix_len, from, len1 - prefix_len, cnt); + name_end = obstack_finish (ob); + + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = name_end; + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + + insert_entry (ht, name_end, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note we don't examine the return value since it is no error + if we have two definitions for a symbol. */ + + /* Increment the value in the byte sequence. */ + if (++bytes[nbytes - 1] == '\0') + { + int b = nbytes - 2; + + do + if (b < 0) + { + lr_error (lr, + _("resulting bytes for range not representable.")); + return; + } + while (++bytes[b--] == 0); + } + } +} + + +struct charseq * +charmap_find_symbol (const struct charmap_t *cm, const char *bytes, + size_t nbytes) +{ + void *result; + + return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result) + < 0 ? NULL : (struct charseq *) result); +} diff --git a/locale/programs/charset.h b/locale/programs/charmap.h index 8f066b1..88fd078 100644 --- a/locale/programs/charset.h +++ b/locale/programs/charmap.h @@ -1,6 +1,6 @@ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -17,27 +17,27 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#ifndef _CHARSET_H -#define _CHARSET_H +#ifndef _CHARMAP_H +#define _CHARMAP_H #include <obstack.h> #include "repertoire.h" #include "simple-hash.h" -#include "linereader.h" struct width_rule { - unsigned int from; - unsigned int to; + struct charseq *from; + struct charseq *to; unsigned int width; }; -struct charset_t +struct charmap_t { const char *code_set_name; + const char *repertoiremap; int mb_cur_min; int mb_cur_max; @@ -48,27 +48,31 @@ struct charset_t struct obstack mem_pool; hash_table char_table; + hash_table byte_table; + hash_table ucs4_table; }; -/* We need one value to mark the error case. Let's use 0xffffffff. - I.e., it is placed in the last page of ISO 10646. For now only the - first is used and we have plenty of room. */ -#define ILLEGAL_CHAR_VALUE ((wchar_t) 0xffffffffu) - +/* This is the structure used for entries in the hash table. It represents + the sequence of bytes used for the coded character. */ +struct charseq +{ + const char *name; + uint32_t ucs4; + int nbytes; + unsigned char bytes[0]; +}; -/* Declared in localedef.c. */ -extern int be_quiet; /* Prototypes for charmap handling functions. */ -struct charset_t *charmap_read (const char *filename); - -/* Prototypes for function to insert new character. */ -void charset_new_char (struct linereader *lr, hash_table *ht, int bytes, - unsigned int value, const char *from, const char *to); +extern struct charmap_t *charmap_read (const char *filename); /* Return the value stored under the given key in the hashing table. */ -unsigned int charset_find_value (const hash_table *ht, - const char *name, size_t len); +extern struct charseq *charmap_find_value (const struct charmap_t *charmap, + const char *name, size_t len); + +/* Return symbol for given multibyte sequence. */ +extern struct charseq *charmap_find_symbol (const struct charmap_t *charmap, + const char *name, size_t len); -#endif /* charset.h */ +#endif /* charmap.h */ diff --git a/locale/programs/config.h b/locale/programs/config.h index 9775572..a293da3 100644 --- a/locale/programs/config.h +++ b/locale/programs/config.h @@ -1,5 +1,25 @@ +/* Configuration for localedef program. + Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + #ifndef _LD_CONFIG_H -#define _LD_CONFIG_H +#define _LD_CONFIG_H 1 /* Use the internal textdomain used for libc messages. */ #define PACKAGE _libc_intl_domainname @@ -18,12 +38,8 @@ # endif #endif - - -#define HAVE_VPRINTF 1 -#define HAVE_STRING_H 1 - +/* This must be one higer than the last used LC_xxx category value. */ +#define __LC_LAST 13 #include_next <config.h> - #endif diff --git a/locale/programs/ld-address.c b/locale/programs/ld-address.c new file mode 100644 index 0000000..805330c --- /dev/null +++ b/locale/programs/ld-address.c @@ -0,0 +1,514 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localeinfo.h" +#include "locfile.h" + + +static struct +{ + const char ab2[2]; + const char ab3[3]; + uint32_t num; +} iso3166[] = +{ +#define DEFINE_COUNTRY_CODE(Name, Ab2, Ab3, Num) \ + { #Ab2, #Ab3, Num }, +#include "iso-3166.def" +}; + + +static struct +{ + const char ab[2]; + const char term[3]; + const char lib[3]; +} iso639[] = +{ +#define DEFINE_LANGUAGE_CODE(Name, Ab, Term, Lib) \ + { #Ab, #Term, #Lib }, +#include "iso-639.def" +}; + + +/* The real definition of the struct for the LC_ADDRESS locale. */ +struct locale_address_t +{ + const char *postal_fmt; + const char *country_name; + const char *country_post; + const char *country_ab2; + const char *country_ab3; + uint32_t country_num; + uint32_t country_num_ob; + const char *country_car; + const char *country_isbn; + const char *lang_name; + const char *lang_ab; + const char *lang_term; + const char *lang_lib; +}; + + +static void +address_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_ADDRESS].address = + (struct locale_address_t *) xcalloc (1, + sizeof (struct locale_address_t)); + + lr->translate_strings = 1; + lr->return_widestr = 0; +} + + +void +address_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + size_t cnt; + int helper; + + if (address->postal_fmt == NULL) + { + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "postal_fmt"); + /* Use as the default value the value of the i18n locale. */ + address->postal_fmt = "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = address->postal_fmt; + + if (*cp == '\0') + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "postal_fmt"); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (*++cp == 'R') + /* Romanize-flag. */ + ++cp; + if (strchr ("afdbshNtreCzTc%", *cp) == NULL) + { + error (0, 0, _("\ +%s: invalid escape `%%%c' sequence in field `%s'"), + "LC_ADDRESS", *cp, "postal_fmt"); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (address->cat == NULL) \ + { \ + if (verbose) \ + error (0, 0, _("%s: field `%s' not defined"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } + + TEST_ELEM (country_name); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_post); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_car); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_isbn); + TEST_ELEM (lang_name); + + helper = 1; + if (address->lang_term == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), "LC_ADDRESS", + "lang_term"); + address->lang_term = ""; + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else if (address->lang_term[0] == '\0') + { + if (verbose) + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_term"); + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else + { + /* Look for this language in the table. */ + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_term, iso639[cnt].term) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + error (0, 0, _("\ +%s: terminology language code `%s' not defined"), + "LC_ADDRESS", address->lang_term); + } + + if (address->lang_ab == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), "LC_ADDRESS", "lang_ab"); + address->lang_ab = ""; + } + else if (address->lang_ab[0] == '\0') + { + if (verbose) + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_ab"); + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + helper = 2; + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_ab, iso639[cnt].ab) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_ab); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0) + error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "lang_ab", "lang_term"); + } + + if (address->lang_lib == NULL) + /* This is no error. */ + address->lang_lib = address->lang_term; + else if (address->lang_lib[0] == '\0') + { + if (verbose) + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_lib"); + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_lib, iso639[cnt].lib) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_lib); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0) + error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), "LC_ADDRESS", "lang_lib", + helper == 1 ? "lang_term" : "lang_ab"); + } + + if (address->country_num == 0) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_num"); + cnt = sizeof (iso3166) / sizeof (iso3166[0]); + } + else + { + for (cnt = 0; cnt < sizeof (iso3166) / sizeof (iso3166[0]); ++cnt) + if (address->country_num == iso3166[cnt].num) + break; + + if (cnt == sizeof (iso3166) / sizeof (iso3166[0])) + error (0, 0, _("\ +%s: numeric country code `%d' not valid"), + "LC_ADDRESS", address->country_num); + } + address->country_num_ob = bswap_32 (address->country_num); + + if (address->country_ab2 == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab2"); + address->country_ab2 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab2, iso3166[cnt].ab2) != 0) + error (0, 0, _("%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab2", "country_num"); + + if (address->country_ab3 == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab3"); + address->country_ab3 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab3, iso3166[cnt].ab3) != 0) + error (0, 0, _("%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab3", "country_num"); +} + + +void +address_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)]; + struct locale_file data; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)]; + size_t cnt = 0; + + data.magic = LIMAGIC (LC_ADDRESS); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) address->postal_fmt; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_name; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_post; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_ab2; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_ab3; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_car; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define country_num_eb country_num_ob +# define country_num_el country_num +#else +# define country_num_eb country_num +# define country_num_el country_num_ob +#endif + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_num_eb; + iov[cnt].iov_len = sizeof (uint32_t); + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_num_el; + iov[cnt].iov_len = sizeof (uint32_t); + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_isbn; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_name; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_ab; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_term; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_lib; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + assert (cnt == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)); + + write_locale_data (output_path, "LC_ADDRESS", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS), iov); +} + + +/* The parser for the LC_ADDRESS section of the locale definition. */ +void +address_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_address_t *address; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_ADDRESS' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_address, LC_ADDRESS, + "LC_ADDRESS", ignore_content); + return; + } + + /* Prepare the data structures. */ + address_startup (ldfile, result, ignore_content); + address = result->categories[LC_ADDRESS].address; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (address->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } \ + else if (!ignore_content) \ + address->cat = arg->val.str.startmb; \ + break + + STR_ELEM (postal_fmt); + STR_ELEM (country_name); + STR_ELEM (country_post); + STR_ELEM (country_ab2); + STR_ELEM (country_ab3); + STR_ELEM (country_car); + STR_ELEM (country_isbn); + STR_ELEM (lang_name); + STR_ELEM (lang_ab); + STR_ELEM (lang_term); + STR_ELEM (lang_lib); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (address->cat != 0) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content) \ + address->cat = arg->val.num; \ + break + + INT_ELEM (country_num); + + case tok_end: + /* Next we assume `LC_ADDRESS'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_ADDRESS"); + else if (arg->tok != tok_lc_address) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_ADDRESS"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_address); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_ADDRESS"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_ADDRESS"); +} diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 265bfd0..3c12674 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -1,6 +1,6 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -21,32 +21,1034 @@ # include <config.h> #endif -#include <endian.h> -#include <errno.h> -#include <limits.h> -#include <locale.h> -#include <obstack.h> +#include <error.h> #include <stdlib.h> -#include <string.h> -#include <wchar.h> -#include <libintl.h> +#include "charmap.h" #include "localeinfo.h" -#include "locales.h" -#include "simple-hash.h" -#include "stringtrans.h" -#include "strlen-hash.h" +#include "linereader.h" +#include "locfile.h" +#include "localedef.h" /* Uncomment the following line in the production version. */ /* #define NDEBUG 1 */ #include <assert.h> +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +/* Forward declaration. */ +struct element_t; + +/* Data type for list of strings. */ +struct section_list +{ + struct section_list *next; + /* Name of the section. */ + const char *name; + /* First element of this section. */ + struct element_t *first; + /* Last element of this section. */ + struct element_t *last; + /* These are the rules for this section. */ + enum coll_sort_rule *rules; +}; + +/* Data type for collating element. */ +struct element_t +{ + const char *mbs; + const uint32_t *wcs; + int order; + + struct element_t **weights; + + /* Where does the definition come from. */ + const char *file; + size_t line; + + /* Which section does this belong to. */ + struct section_list *section; + + /* Predecessor and successor in the order list. */ + struct element_t *last; + struct element_t *next; +}; + +/* Data type for collating symbol. */ +struct symbol_t +{ + /* Point to place in the order list. */ + struct element_t *order; + + /* Where does the definition come from. */ + const char *file; + size_t line; +}; + + +/* The real definition of the struct for the LC_COLLATE locale. */ +struct locale_collate_t +{ + int col_weight_max; + int cur_weight_max; + + /* List of known scripts. */ + struct section_list *sections; + /* Current section using definition. */ + struct section_list *current_section; + /* There always can be an unnamed section. */ + struct section_list unnamed_section; + /* To make handling of errors easier we have another section. */ + struct section_list error_section; + + /* Number of sorting rules given in order_start line. */ + uint32_t nrules; + + /* Start of the order list. */ + struct element_t *start; + + /* The undefined element. */ + struct element_t undefined; -#define MAX(a, b) ((a) > (b) ? (a) : (b)) + /* This is the cursor for `reorder_after' insertions. */ + struct element_t *cursor; -#define SWAPU32(w) \ - (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) + /* Remember whether last weight was an ellipsis. */ + int was_ellipsis; + + /* Known collating elements. */ + hash_table elem_table; + + /* Known collating symbols. */ + hash_table sym_table; + + /* Known collation sequences. */ + hash_table seq_table; + + struct obstack mempool; + + /* The LC_COLLATE category is a bit special as it is sometimes possible + that the definitions from more than one input file contains information. + Therefore we keep all relevant input in a list. */ + struct locale_collate_t *next; +}; + + +/* We have a few global variables which are used for reading all + LC_COLLATE category descriptions in all files. */ +static int nrules; + + +static struct section_list * +make_seclist_elem (struct locale_collate_t *collate, const char *string, + struct section_list *next) +{ + struct section_list *newp; + + newp = (struct section_list *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->next = next; + newp->name = string; + newp->first = NULL; + + return newp; +} + + +static struct element_t * +new_element (struct locale_collate_t *collate, const char *mbs, + const uint32_t *wcs) +{ + struct element_t *newp; + + newp = (struct element_t *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->mbs = mbs; + newp->wcs = wcs; + newp->order = 0; + + newp->file = NULL; + newp->line = 0; + + newp->section = NULL; + + newp->last = NULL; + newp->next = NULL; + + return newp; +} + + +static struct symbol_t * +new_symbol (struct locale_collate_t *collate) +{ + struct symbol_t *newp; + newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); + + newp->order = NULL; + + newp->file = NULL; + newp->line = 0; + + return newp; +} + + +/* Test whether this name is already defined somewhere. */ +static int +check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate, + struct charmap_t *charmap, struct repertoire_t *repertoire, + const char *symbol, size_t symbol_len) +{ + void *ignore = NULL; + + if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined in charmap"), symbol); + return 1; + } + + if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined in repertoire"), symbol); + return 1; + } + + if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol); + return 1; + } + + if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined as collating element"), + symbol); + return 1; + } + + return 0; +} + + +/* Read the direction specification. */ +static void +read_directions (struct linereader *ldfile, struct token *arg, + struct charmap_t *charmap, struct repertoire_t *repertoire, + struct locale_collate_t *collate) +{ + int cnt = 0; + int max = nrules ?: 10; + enum coll_sort_rule *rules = calloc (max, sizeof (*rules)); + int warned = 0; + + while (1) + { + int valid = 0; + + if (arg->tok == tok_forward) + { + if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d"), + "LC_COLLATE", "forward", cnt + 1); + } + } + else + rules[cnt] |= sort_forward; + + valid = 1; + } + else if (arg->tok == tok_backward) + { + if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d"), + "LC_COLLATE", "backward", cnt + 1); + } + } + else + rules[cnt] |= sort_backward; + + valid = 1; + } + else if (arg->tok == tok_position) + { + if (rules[cnt] & sort_position) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d in category `%s'"), + "LC_COLLATE", "position", cnt + 1); + } + } + else + rules[cnt] |= sort_position; + + valid = 1; + } + + if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma + || arg->tok == tok_semicolon) + { + if (! valid && ! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + + /* See whether we have to increment the counter. */ + if (arg->tok != tok_comma && rules[cnt] != 0) + ++cnt; + + if (arg->tok == tok_eof || arg->tok == tok_eol) + /* End of line or file, so we exit the loop. */ + break; + + if (nrules == 0) + { + /* See whether we have enough room in the array. */ + if (cnt == max) + { + max += 10; + rules = (enum coll_sort_rule *) xrealloc (rules, + max + * sizeof (*rules)); + memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules)); + } + } + else + { + if (cnt == nrules) + { + /* There must not be any more rule. */ + if (! warned) + { + lr_error (ldfile, _("\ +%s: too many rules; first entry only had %d"), + "LC_COLLATE", nrules); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + break; + } + } + } + else + { + if (! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + } + + arg = lr_token (ldfile, charmap, repertoire); + } + + if (nrules == 0) + { + /* Now we know how many rules we have. */ + nrules = cnt; + rules = (enum coll_sort_rule *) xrealloc (rules, + nrules * sizeof (*rules)); + } + else + { + if (cnt < nrules) + { + /* Not enough rules in this specification. */ + if (! warned) + lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE"); + + do + rules[cnt] = sort_forward; + while (++cnt < nrules); + } + } + + collate->current_section->rules = rules; +} + + +static void +insert_value (struct linereader *ldfile, struct token *arg, + struct charmap_t *charmap, struct repertoire_t *repertoire, + struct locale_collate_t *collate) +{ + /* First find out what kind of symbol this is. */ + struct charseq *seq; + uint32_t wc; + struct element_t *elem = NULL; + int weight_cnt; + + /* First determine the wide character. There must be such a value, + otherwise we ignore it (if it is no collatio symbol or element). */ + wc = repertoire_find_value (repertoire, arg->val.str.startmb, + arg->val.str.lenmb); + + /* Try to find the character in the charmap. */ + seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb); + + if (wc == ILLEGAL_CHAR_VALUE) + { + /* It's no character, so look through the collation elements and + symbol list. */ + void *result; + + if (find_entry (&collate->sym_table, arg->val.str.startmb, + arg->val.str.lenmb, &result) == 0) + { + /* It's a collation symbol. */ + struct symbol_t *sym = (struct symbol_t *) result; + elem = sym->order; + } + else if (find_entry (&collate->elem_table, arg->val.str.startmb, + arg->val.str.lenmb, &result) != 0) + /* It's also no collation element. Therefore ignore it. */ + return; + } + + /* XXX elem must be defined. */ + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL) + { + lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"), + arg->val.str.startmb, arg->val.str.lenmb, + elem->file, elem->line); + return; + } + + /* Initialize all the fields. */ + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + elem->last = collate->cursor; + elem->next = collate->cursor ? collate->cursor->next : NULL; + elem->weights = (struct element_t **) + obstack_alloc (&collate->mempool, nrules * sizeof (struct element_t *)); + memset (elem->weights, '\0', nrules * sizeof (struct element_t *)); + + if (collate->current_section->first == NULL) + collate->current_section->first = elem; + if (collate->current_section->last == collate->cursor) + collate->current_section->last = elem; + + collate->cursor = elem; + + /* Now read the rest of the line. */ + ldfile->return_widestr = 1; + + weight_cnt = 0; + do + { + arg = lr_token (ldfile, charmap, repertoire); + + if (arg->tok == tok_eof || arg->tok == tok_eol) + { + /* This means the rest of the line uses the current element + as the weight. */ + do + elem->weights[weight_cnt] = elem; + while (++weight_cnt < nrules); + + return; + } + + if (arg->tok == tok_ignore) + { + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + } + else if (arg->tok == tok_bsymbol) + { + + } + } + while (++weight_cnt < nrules); + + lr_ignore_rest (ldfile, weight_cnt == nrules); +} + + +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + struct locale_collate_t *collate; + + collate = locale->categories[LC_COLLATE].collate = + (struct locale_collate_t *) xcalloc (1, + sizeof (struct locale_collate_t)); + + /* Init the various data structures. */ + init_hash (&collate->elem_table, 100); + init_hash (&collate->sym_table, 100); + init_hash (&collate->seq_table, 500); + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; + } + + ldfile->translate_strings = 1; + ldfile->return_widestr = 0; +} + + +void +collate_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ +} + + +void +collate_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ +} + + +void +collate_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_collate_t *collate; + struct token *now; + struct token *arg; + enum token_t nowtok; + int state = 0; + int was_ellipsis = 0; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + if (nowtok == tok_copy) + { + state = 2; + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_string) + goto err_label; + /* XXX Use the name */ + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* Prepare the data structures. */ + collate_startup (ldfile, result, ignore_content); + collate = result->categories[LC_COLLATE].collate; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_coll_weight_max: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok != tok_number) + goto err_label; + if (collate->col_weight_max != -1) + lr_error (ldfile, _("%s: duplicate definition of `%s'"), + "LC_COLLATE", "col_weight_max"); + else + collate->col_weight_max = arg->val.num; + lr_ignore_rest (ldfile, 1); + break; + + case tok_section_symbol: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else if (!ignore_content) + { + /* Check whether this section is already known. */ + struct section_list *known = collate->sections; + while (known != NULL) + if (strcmp (known->name, arg->val.str.startmb) == 0) + break; + + if (known != NULL) + { + lr_error (ldfile, + _("%s: duplicate declaration of section `%s'"), + "LC_COLLATE", arg->val.str.startmb); + free (arg->val.str.startmb); + } + else + collate->sections = make_seclist_elem (collate, + arg->val.str.startmb, + collate->sections); + + lr_ignore_rest (ldfile, known == NULL); + } + else + { + free (arg->val.str.startmb); + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_collating_element: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + + /* Next the `from' keyword. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_from) + { + free ((char *) symbol); + goto err_label; + } + + ldfile->return_widestr = 1; + + /* Finally the string with the replacement. */ + arg = lr_token (ldfile, charmap, repertoire); + ldfile->return_widestr = 0; + if (arg->tok != tok_string) + goto err_label; + + if (!ignore_content) + { + if (symbol == NULL) + lr_error (ldfile, _("\ +%s: unknown character in collating element name"), + "LC_COLLATE"); + if (arg->val.str.startmb == NULL) + lr_error (ldfile, _("\ +%s: unknown character in collating element definition"), + "LC_COLLATE"); + if (arg->val.str.startwc == NULL) + lr_error (ldfile, _("\ +%s: unknown wide character in collating element definition"), + "LC_COLLATE"); + else if (arg->val.str.lenwc < 2) + lr_error (ldfile, _("\ +%s: substitution string in collating element definition must have at least two characters"), + "LC_COLLATE"); + + if (symbol != NULL) + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_elem_free; + + if (insert_entry (&collate->elem_table, + symbol, symbol_len, + new_element (collate, + arg->val.str.startmb, + arg->val.str.startwc)) + < 0) + lr_error (ldfile, _("\ +error while adding collating element")); + } + else + goto col_elem_free; + } + else + { + col_elem_free: + if (symbol != NULL) + free ((char *) symbol); + if (arg->val.str.startmb != NULL) + free (arg->val.str.startmb); + if (arg->val.str.startwc != NULL) + free (arg->val.str.startwc); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_collating_symbol: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + + if (!ignore_content) + { + if (symbol == NULL) + lr_error (ldfile, _("\ +%s: unknown character in collating symbol name"), + "LC_COLLATE"); + else + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_sym_free; + + if (insert_entry (&collate->sym_table, + symbol, symbol_len, + new_symbol (collate)) < 0) + lr_error (ldfile, _("\ +error while adding collating symbol")); + } + } + else + { + col_sym_free: + if (symbol != NULL) + free ((char *) symbol); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_symbol_equivalence: + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *newname = arg->val.str.startmb; + size_t newname_len = arg->val.str.lenmb; + const char *symname; + size_t symname_len; + struct symbol_t *symval; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + { + if (newname != NULL) + free ((char *) newname); + goto err_label; + } + + symname = arg->val.str.startmb; + symname_len = arg->val.str.lenmb; + + if (!ignore_content) + { + if (newname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition name"), + "LC_COLLATE"); + goto sym_equiv_free; + } + if (symname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition value"), + "LC_COLLATE"); + goto sym_equiv_free; + } + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symname, symname_len)) + goto col_sym_free; + + /* See whether the symbol name is already defined. */ + if (find_entry (&collate->sym_table, symname, symname_len, + (void **) &symval) != 0) + { + lr_error (ldfile, _("\ +%s: unknown symbol `%s' in equivalent definition"), + "LC_COLLATE", symname); + goto col_sym_free; + } + + if (insert_entry (&collate->sym_table, + newname, newname_len, symval) < 0) + { + lr_error (ldfile, _("\ +error while adding equivalent collating symbol")); + goto sym_equiv_free; + } + + free ((char *) symname); + } + else + { + sym_equiv_free: + if (newname != NULL) + free ((char *) newname); + if (symname != NULL) + free ((char *) symname); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_order_start: + if (state != 0 && state != 1) + goto err_label; + state = 1; + + /* The 14652 draft does not specify whether all `order_start' lines + must contain the same number of sort-rules, but 14651 does. So + we require this here as well. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_bsymbol) + { + /* This better should be a section name. */ + struct section_list *sp = collate->sections; + while (sp != NULL + && strcmp (sp->name, arg->val.str.startmb) != 0) + sp = sp->next; + + if (sp == NULL) + { + lr_error (ldfile, _("\ +%s: unknown section name `%s'"), + "LC_COLLATE", arg->val.str.startmb); + /* We use the error section. */ + collate->current_section = &collate->error_section; + } + else + { + /* Remember this section. */ + collate->current_section = sp; + + /* One should not be allowed to open the same + section twice. */ + if (sp->first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for section `%s'"), + "LC_COLLATE", sp->name); + + /* Next should come the end of the line or a semicolon. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_eol) + { + uint32_t cnt; + + /* This means we have exactly one rule: `forward'. */ + if (collate->nrules > 1) + lr_error (ldfile, _("\ +%s: invalid number of sorting rules"), + "LC_COLLATE"); + else + collate->nrules = 1; + sp->rules = obstack_alloc (&collate->mempool, + (sizeof (enum coll_sort_rule) + * collate->nrules)); + for (cnt = 0; cnt < collate->nrules; ++cnt) + sp->rules[cnt] = sort_forward; + + /* Next line. */ + break; + } + + /* Get the next token. */ + arg = lr_token (ldfile, charmap, repertoire); + } + } + else + { + /* There is no section symbol. Therefore we use the unnamed + section. */ + collate->current_section = &collate->unnamed_section; + + if (collate->unnamed_section.first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for unnamed section"), + "LC_COLLATE"); + } + + /* Now read the direction names. */ + read_directions (ldfile, arg, charmap, repertoire, collate); + break; + + case tok_order_end: + if (state != 1) + goto err_label; + state = 2; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_after: + if (state != 2 && state != 3) + goto err_label; + state = 3; + /* XXX get symbol */ + break; + + case tok_reorder_end: + if (state != 3) + goto err_label; + state = 4; + lr_ignore_rest (ldfile, 1); + break; + + case tok_bsymbol: + if (state != 1 && state != 3) + goto err_label; + + if (state == 3) + { + /* It is possible that we already have this collation sequence. + In this case we move the entry. */ + struct element_t *seqp; + + if (find_entry (&collate->seq_table, arg->val.str.startmb, + arg->val.str.lenmb, (void **) &seqp) == 0) + { + /* Remove the entry from the old position. */ + if (seqp->last == NULL) + collate->start = seqp->next; + else + seqp->last->next = seqp->next; + if (seqp->next != NULL) + seqp->next->last = seqp->last; + + /* We also have to check whether this entry is the + first or last of a section. */ + if (seqp->section->first == seqp) + { + if (seqp->section->first == seqp->section->last) + /* This setion has no content anymore. */ + seqp->section->first = seqp->section->last = NULL; + else + seqp->section->first = seqp->next; + } + else if (seqp->section->last == seqp) + seqp->section->last = seqp->last; + + seqp->last = seqp->next = NULL; + } + } + + /* Now insert in the new place. */ + insert_value (ldfile, arg, charmap, repertoire, collate); + break; + + case tok_undefined: + if (state != 1) + goto err_label; + /* XXX handle UNDEFINED weight */ + break; + + case tok_ellipsis3: + if (state != 1 && state != 3) + goto err_label; + + was_ellipsis = 1; + /* XXX Read the remainder of the line and remember what are + the weights. */ + break; + + case tok_end: + /* Next we assume `LC_COLLATE'. */ + if (state == 0) + /* We must either see a copy statement or have ordering values. */ + lr_error (ldfile, _("%s: empty category description not allowed"), + "LC_COLLATE"); + else if (state == 1) + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + else if (state == 3) + error (0, 0, _("%s: missing `reorder-end' keyword"), + "LC_COLLATE"); + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE"); + else if (arg->tok != tok_lc_collate) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_collate); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); +} + + +#if 0 /* What kind of symbols get defined? */ enum coll_symbol @@ -75,7 +1077,8 @@ typedef struct patch_t typedef struct element_t { - const wchar_t *name; + const char *namemb; + const uint32_t *namewc; unsigned int this_weight; struct element_t *next; @@ -95,12 +1098,12 @@ struct locale_collate_t hash_table elements; struct obstack element_mem; - /* The result table. */ - hash_table result; + /* The result tables. */ + hash_table resultmb; + hash_table resultwc; /* Sorting rules given in order_start line. */ - u_int32_t nrules; - u_int32_t nrules_max; + uint32_t nrules; enum coll_sort_rule *rules; /* Used while recognizing symbol composed of multiple tokens @@ -114,20 +1117,12 @@ struct locale_collate_t /* Was lastline ellipsis? */ int was_ellipsis; /* Value of last entry if was character. */ - wchar_t last_char; + uint32_t last_char; /* Current element. */ element_t *current_element; /* What kind of symbol is current element. */ enum coll_symbol kind; - /* While collecting the weights we need some temporary space. */ - unsigned int current_order; - int *weight_cnt; - unsigned int weight_idx; - unsigned int *weight; - size_t nweight; - size_t nweight_max; - /* Patch lists. */ patch_t *current_patch; patch_t *all_patches; @@ -135,6 +1130,10 @@ struct locale_collate_t /* Room for the UNDEFINED information. */ element_t undefined; unsigned int undefined_len; + + /* Script information. */ + const char **scripts; + unsigned int nscripts; }; @@ -142,25 +1141,22 @@ struct locale_collate_t extern int verbose; -void *xmalloc (size_t __n); -void *xrealloc (void *__p, size_t __n); - #define obstack_chunk_alloc malloc #define obstack_chunk_free free -void -collate_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) -{ - struct locale_collate_t *collate; +/* Prototypes for local functions. */ +static void collate_startup (struct linereader *ldfile, + struct localedef_t *locale, + struct charmap_t *charmap, int ignore_content); - /* We have a definition for LC_COLLATE. */ - copy_posix.mask &= ~(1 << LC_COLLATE); - /* It is important that we always use UCS4 encoding for strings now. */ - encoding_method = ENC_UCS4; +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + struct charmap_t *charset, int ignore_content) +{ + struct locale_collate_t *collate; /* Allocate the needed room. */ locale->categories[LC_COLLATE].collate = collate = @@ -196,12 +1192,14 @@ collate_startup (struct linereader *lr, struct localedef_t *locale, /* This tells us no UNDEFINED entry was found until now. */ memset (&collate->undefined, '\0', sizeof (collate->undefined)); - lr->translate_strings = 0; + ldfile->translate_strings = 0; + ldfile->return_widestr = 0; } void -collate_finish (struct localedef_t *locale, struct charset_t *charset) +collate_finish (struct localedef_t *locale, struct charset_t *charset, + struct repertoire_t *repertoire) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; patch_t *patch; @@ -211,7 +1209,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) correctly filled. */ for (patch = collate->all_patches; patch != NULL; patch = patch->next) { - wchar_t wch; + uint32_t wch; size_t toklen = strlen (patch->token); void *ptmp; unsigned int value = 0; @@ -221,7 +1219,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) { element_t *runp; - if (find_entry (&collate->result, &wch, sizeof (wchar_t), + if (find_entry (&collate->result, &wch, sizeof (uint32_t), (void *) &runp) < 0) runp = NULL; for (; runp != NULL; runp = runp->next) @@ -262,9 +1260,9 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) |* XXX We should test whether really an unspecified character *| |* exists before giving the message. *| \**************************************************************/ - u_int32_t weight; + uint32_t weight; - if (/* XXX Remove the 0 & */ 0 && !be_quiet) + if (!be_quiet) error (0, 0, _("no definition of `UNDEFINED'")); collate->undefined.ordering_len = collate->nrules; @@ -272,7 +1270,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) for (cnt = 0; cnt < collate->nrules; ++cnt) { - u_int32_t one = 1; + uint32_t one = 1; obstack_grow (&collate->element_mem, &one, sizeof (one)); } @@ -282,7 +1280,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) collate->undefined.ordering = obstack_finish (&collate->element_mem); } - collate->undefined_len = 2; /* For the name: 1 x wchar_t + L'\0'. */ + collate->undefined_len = 2; /* For the name: 1 x uint32_t + L'\0'. */ for (cnt = 0; cnt < collate->nrules; ++cnt) collate->undefined_len += 1 + collate->undefined.ordering[cnt]; } @@ -291,40 +1289,40 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) void collate_output (struct localedef_t *locale, struct charset_t *charset, - const char *output_path) + struct repertoire_t *repertoire, const char *output_path) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - u_int32_t table_size, table_best, level_best, sum_best; + uint32_t table_size, table_best, level_best, sum_best; void *last; element_t *pelem; - wchar_t *name; + uint32_t *name; size_t len; const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); struct iovec iov[2 + nelems]; struct locale_file data; - u_int32_t idx[nelems]; + uint32_t idx[nelems]; struct obstack non_simple; struct obstack string_pool; size_t cnt, entry_size; - u_int32_t undefined_offset = UINT_MAX; - u_int32_t *table, *extra, *table2, *extra2; + uint32_t undefined_offset = UINT_MAX; + uint32_t *table, *extra, *table2, *extra2; size_t extra_len; - u_int32_t element_hash_tab_size; - u_int32_t *element_hash_tab; - u_int32_t *element_hash_tab_ob; - u_int32_t element_string_pool_size; + uint32_t element_hash_tab_size; + uint32_t *element_hash_tab; + uint32_t *element_hash_tab_ob; + uint32_t element_string_pool_size; char *element_string_pool; - u_int32_t element_value_size; - wchar_t *element_value; - wchar_t *element_value_ob; - u_int32_t symbols_hash_tab_size; - u_int32_t *symbols_hash_tab; - u_int32_t *symbols_hash_tab_ob; - u_int32_t symbols_string_pool_size; + uint32_t element_value_size; + uint32_t *element_value; + uint32_t *element_value_ob; + uint32_t symbols_hash_tab_size; + uint32_t *symbols_hash_tab; + uint32_t *symbols_hash_tab_ob; + uint32_t symbols_string_pool_size; char *symbols_string_pool; - u_int32_t symbols_class_size; - u_int32_t *symbols_class; - u_int32_t *symbols_class_ob; + uint32_t symbols_class_size; + uint32_t *symbols_class; + uint32_t *symbols_class_ob; hash_table *hash_tab; unsigned int dummy_weights[collate->nrules + 1]; @@ -382,29 +1380,29 @@ Computing table size for collation information might take a while..."), iov[1].iov_len = sizeof (idx); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u_int32_t); + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (uint32_t); - table = (u_int32_t *) alloca (collate->nrules * sizeof (u_int32_t)); + table = (uint32_t *) alloca (collate->nrules * sizeof (uint32_t)); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len - = collate->nrules * sizeof (u_int32_t); + = collate->nrules * sizeof (uint32_t); /* Another trick here. Describing the collation method needs only a few bits (3, to be exact). But the binary file should be accessible by machines with both endianesses and so we store both forms in the same word. */ for (cnt = 0; cnt < collate->nrules; ++cnt) - table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]); + table[cnt] = collate->rules[cnt] | bswap_32 (collate->rules[cnt]); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u_int32_t); + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len - = sizeof (u_int32_t); + = sizeof (uint32_t); entry_size = 1 + MAX (collate->nrules, 2); - table = (u_int32_t *) alloca (table_best * level_best * entry_size + table = (uint32_t *) alloca (table_best * level_best * entry_size * sizeof (table[0])); memset (table, '\0', table_best * level_best * entry_size * sizeof (table[0])); @@ -413,7 +1411,7 @@ Computing table size for collation information might take a while..."), /* Macros for inserting in output table. */ #define ADD_VALUE(expr) \ do { \ - u_int32_t to_write = (u_int32_t) expr; \ + uint32_t to_write = (uint32_t) expr; \ obstack_grow (&non_simple, &to_write, sizeof (to_write)); \ } while (0) @@ -424,7 +1422,7 @@ Computing table size for collation information might take a while..."), ADD_VALUE (len); \ \ wlen = wcslen (pelem->name); \ - obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \ + obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \ \ idx = collate->nrules; \ for (cnt = 0; cnt < collate->nrules; ++cnt) \ @@ -448,14 +1446,14 @@ Computing table size for collation information might take a while..."), table[(level * table_best + slot) * entry_size + 1] \ = FORWARD_CHAR; \ table[(level * table_best + slot) * entry_size + 2] \ - = obstack_object_size (&non_simple) / sizeof (u_int32_t); \ + = obstack_object_size (&non_simple) / sizeof (uint32_t); \ \ /* Here we have to construct the non-simple table entry. First \ compute the total length of this entry. */ \ for (runp = (pelem); runp != NULL; runp = runp->next) \ if (runp->ordering != NULL) \ { \ - u_int32_t value; \ + uint32_t value; \ size_t cnt; \ \ value = 1 + wcslen (runp->name) + 1; \ @@ -491,7 +1489,7 @@ Computing table size for collation information might take a while..."), ADD_VALUE (collate->undefined.ordering[cnt]); \ for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \ { \ - if ((wchar_t) collate->undefined.ordering[idx] \ + if ((uint32_t) collate->undefined.ordering[idx] \ == ELLIPSIS_CHAR) \ ADD_VALUE ((pelem)->name[0]); \ else \ @@ -543,14 +1541,15 @@ Computing table size for collation information might take a while..."), { /* We have to fill in the information from the UNDEFINED entry. */ - table[cnt * entry_size] = (u_int32_t) cnt; + table[cnt * entry_size] = (uint32_t) cnt; if (collate->undefined.ordering_len == collate->nrules) { size_t inner; for (inner = 0; inner < collate->nrules; ++inner) - if ((wchar_t)collate->undefined.ordering[collate->nrules + inner] + if ((uint32_t)collate->undefined.ordering[collate->nrules + + inner] == ELLIPSIS_CHAR) table[cnt * entry_size + 1 + inner] = cnt; else @@ -609,8 +1608,6 @@ Computing table size for collation information might take a while..."), size_t idx, cnt; undefined_offset = obstack_object_size (&non_simple); - assert (undefined_offset % sizeof (u_int32_t) == 0); - undefined_offset /= sizeof (u_int32_t); idx = collate->nrules; for (cnt = 0; cnt < collate->nrules; ++cnt) @@ -625,19 +1622,19 @@ Computing table size for collation information might take a while..."), /* Finish the extra block. */ extra_len = obstack_object_size (&non_simple); - extra = (u_int32_t *) obstack_finish (&non_simple); - assert ((extra_len % sizeof (u_int32_t)) == 0); + extra = (uint32_t *) obstack_finish (&non_simple); + assert ((extra_len % sizeof (uint32_t)) == 0); /* Now we have to build the two array for the other byte ordering. */ - table2 = (u_int32_t *) alloca (table_best * level_best * entry_size + table2 = (uint32_t *) alloca (table_best * level_best * entry_size * sizeof (table[0])); - extra2 = (u_int32_t *) alloca (extra_len); + extra2 = (uint32_t *) alloca (extra_len); for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt) - table2[cnt] = SWAPU32 (table[cnt]); + table2[cnt] = bswap_32 (table[cnt]); - for (cnt = 0; cnt < extra_len / sizeof (u_int32_t); ++cnt) - extra2[cnt] = SWAPU32 (extra[cnt]); + for (cnt = 0; cnt < extra_len / sizeof (uint32_t); ++cnt) + extra2[cnt] = bswap_32 (extra2[cnt]); /* We need a simple hashing table to get a collation-element->chars mapping. We again use internal hashing using a secondary hashing @@ -687,9 +1684,9 @@ Computing table size for collation information might take a while..."), element_hash_tab_size = 7; element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); memset (element_hash_tab, '\377', (2 * element_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); ptr = NULL; while (iterate_table (&collate->elements, &ptr, (const void **) &key, @@ -698,7 +1695,7 @@ Computing table size for collation information might take a while..."), size_t hash_val = hash_string (key, keylen); size_t idx = hash_val % element_hash_tab_size; - if (element_hash_tab[2 * idx] != (~((u_int32_t) 0))) + if (element_hash_tab[2 * idx] != (~((uint32_t) 0))) { /* We need the second hashing function. */ size_t c = 1 + (hash_val % (element_hash_tab_size - 2)); @@ -708,16 +1705,16 @@ Computing table size for collation information might take a while..."), idx -= element_hash_tab_size - c; else idx += c; - while (element_hash_tab[2 * idx] != (~((u_int32_t) 0))); + while (element_hash_tab[2 * idx] != (~((uint32_t) 0))); } element_hash_tab[2 * idx] = obstack_object_size (&non_simple); element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool) - / sizeof (wchar_t)); + / sizeof (uint32_t)); obstack_grow0 (&non_simple, key, keylen); obstack_grow (&string_pool, data->name, - (wcslen (data->name) + 1) * sizeof (wchar_t)); + (wcslen (data->name) + 1) * sizeof (uint32_t)); } if (obstack_object_size (&non_simple) % 4 != 0) @@ -732,18 +1729,13 @@ Computing table size for collation information might take a while..."), /* Create the tables for the other byte order. */ element_hash_tab_ob = obstack_alloc (&non_simple, (2 * element_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt) - element_hash_tab_ob[cnt] = SWAPU32 (element_hash_tab[cnt]); + element_hash_tab_ob[cnt] = bswap_U32 (element_hash_tab[cnt]); element_value_ob = obstack_alloc (&string_pool, element_value_size); - if (sizeof (wchar_t) != 4) - { - fputs ("sizeof (wchar_t) != 4 currently not handled", stderr); - abort (); - } for (cnt = 0; cnt < element_value_size / 4; ++cnt) - element_value_ob[cnt] = SWAPU32 (element_value[cnt]); + element_value_ob[cnt] = bswap_32 (element_value[cnt]); } /* Store collation elements as map to collation class. There are @@ -757,9 +1749,9 @@ Computing table size for collation information might take a while..."), + collate->elements.filled + collate->symbols.filled)) / 3); symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); /* Now fill the array. First the symbols from the character set, then the collation elements and last the collation symbols. */ @@ -777,29 +1769,29 @@ Computing table size for collation information might take a while..."), { size_t hash_val; size_t idx; - u_int32_t word; + uint32_t word; unsigned int *weights; if (hash_tab == &charset->char_table || hash_tab == &collate->elements) { element_t *lastp, *firstp; - wchar_t dummy_name[2]; - const wchar_t *name; + uint32_t dummy_name[2]; + const uint32_t *name; size_t name_len; if (hash_tab == &charset->char_table) { - dummy_name[0] = (wchar_t) ((unsigned long int) data); + dummy_name[0] = (uint32_t) ((unsigned long int) data); dummy_name[1] = L'\0'; name = dummy_name; - name_len = sizeof (wchar_t); + name_len = sizeof (uint32_t); } else { element_t *elemp = (element_t *) data; name = elemp->name; - name_len = wcslen (name) * sizeof (wchar_t); + name_len = wcslen (name) * sizeof (uint32_t); } /* First check whether this character is used at all. */ @@ -815,8 +1807,6 @@ Computing table size for collation information might take a while..."), lastp = firstp; while (lastp->next != NULL && wcscmp (name, lastp->name)) lastp = lastp->next; - if (lastp->ordering == NULL) - lastp = &collate->undefined; } weights = lastp->ordering; @@ -835,7 +1825,7 @@ Computing table size for collation information might take a while..."), hash_val = hash_string (key, keylen); idx = hash_val % symbols_hash_tab_size; - if (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0))) + if (symbols_hash_tab[2 * idx] != (~((uint32_t) 0))) { /* We need the second hashing function. */ size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2)); @@ -845,23 +1835,23 @@ Computing table size for collation information might take a while..."), idx -= symbols_hash_tab_size - c; else idx += c; - while (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0))); + while (symbols_hash_tab[2 * idx] != (~((uint32_t) 0))); } symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool); symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple) - / sizeof (u_int32_t)); + / sizeof (uint32_t)); obstack_grow0 (&string_pool, key, keylen); /* Adding the first weight looks complicated. We have to deal with the kind it is stored and with the fact that original - form uses `unsigned int's while we need `u_int32_t' here. */ + form uses `unsigned int's while we need `uint32_t' here. */ word = weights[0]; - obstack_grow (&non_simple, &word, sizeof (u_int32_t)); + obstack_grow (&non_simple, &word, sizeof (uint32_t)); for (cnt = 0; cnt < weights[0]; ++cnt) { word = weights[collate->nrules + cnt]; - obstack_grow (&non_simple, &word, sizeof (u_int32_t)); + obstack_grow (&non_simple, &word, sizeof (uint32_t)); } } @@ -884,13 +1874,13 @@ Computing table size for collation information might take a while..."), /* Generate tables with other byte order. */ symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size - * sizeof (u_int32_t))); + * sizeof (uint32_t))); for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt) - symbols_hash_tab_ob[cnt] = SWAPU32 (symbols_hash_tab[cnt]); + symbols_hash_tab_ob[cnt] = bswap_32 (symbols_hash_tab[cnt]); symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size); for (cnt = 0; cnt < symbols_class_size / 4; ++cnt) - symbols_class_ob[cnt] = SWAPU32 (symbols_class[cnt]); + symbols_class_ob[cnt] = bswap_32 (symbols_class[cnt]); /* Store table addresses and lengths. */ @@ -925,34 +1915,34 @@ Computing table size for collation information might take a while..."), #endif iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u_int32_t); + iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base = &element_hash_tab_size; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len - = sizeof (u_int32_t); + = sizeof (uint32_t); #if __BYTE_ORDER == __BIG_ENDIAN iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base = element_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base = element_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); #else iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base = element_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base = element_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len - = 2 * element_hash_tab_size * sizeof (u_int32_t); + = 2 * element_hash_tab_size * sizeof (uint32_t); #endif iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base @@ -985,28 +1975,28 @@ Computing table size for collation information might take a while..."), iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base = &symbols_hash_tab_size; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len - = sizeof (u_int32_t); + = sizeof (uint32_t); #if __BYTE_ORDER == __BIG_ENDIAN iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base = symbols_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base = symbols_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); #else iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base = symbols_hash_tab; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base = symbols_hash_tab_ob; iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len - = 2 * symbols_hash_tab_size * sizeof (u_int32_t); + = 2 * symbols_hash_tab_size * sizeof (uint32_t); #endif iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base @@ -1048,58 +2038,64 @@ Computing table size for collation information might take a while..."), } -void -collate_element_to (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +static int +collate_element_to (struct linereader *ldfile, + struct locale_collate_t *collate, + struct token *code, struct charmap_t *charmap, + struct repertoire_t *repertoire) { - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - unsigned int value; + struct charseq *seq; + uint32_t value; void *not_used; - if (collate->combine_token != NULL) + seq = charmap_find_value (charmap, code->val.str.start, code->val.str.len); + if (seq != NULL) { - free ((void *) collate->combine_token); - collate->combine_token = NULL; + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in charmap"), + (int) code->val.str.len, code->val.str.start); + return 1; } - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); - if ((wchar_t) value != ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, code->val.str.start, + code->val.str.len); + if (value != ILLEGAL_CHAR_VALUE) { - lr_error (lr, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in charset"), + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in repertoire"), (int) code->val.str.len, code->val.str.start); - return; + return 1; } if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " - "`%.*s' duplicates element definition"), + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates other element definition"), (int) code->val.str.len, code->val.str.start); - return; + return 1; } if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " + lr_error (ldfile, _("symbol for multicharacter collating element " "`%.*s' duplicates symbol definition"), (int) code->val.str.len, code->val.str.start); - return; + return 1; } - collate->combine_token = code->val.str.start; - collate->combine_token_len = code->val.str.len; + return 0; } -void -collate_element_from (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +static void +collate_element_from (struct linereader *ldfile, + struct locale_collate_t *collate, + const char *to_str, struct token *code, + struct charmap_t *charmap, + struct repertoire_t *repertoire) { - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; element_t *elemp, *runp; /* CODE is a string. */ @@ -1108,33 +2104,26 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale, /* We have to translate the string. It may contain <...> character names. */ - elemp->name = (wchar_t *) translate_string (code->val.str.start, charset); + elemp->namemb = code->val.str.startmb; + elemp->namewc = code->val.str.startwc; elemp->this_weight = 0; elemp->ordering = NULL; elemp->ordering_len = 0; - free (code->val.str.start); - - if (elemp->name == NULL) + if (elemp->namemb == NULL && elemp->namewc == NULL) { - /* At least one character in the string is not defined. We simply - do nothing. */ + /* The string contains characters which are not in the charmap nor + in the repertoire. Ignore the string. */ if (verbose) - lr_error (lr, _("\ + lr_error (ldfile, _("\ `from' string in collation element declaration contains unknown character")); return; } - if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0') - { - lr_error (lr, _("illegal collation element")); - return; - } - /* The entries in the linked lists of RESULT are sorting in descending order. The order is important for the `strcoll' and `wcscoll' functions. */ - if (find_entry (&collate->result, elemp->name, sizeof (wchar_t), + if (find_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t), (void *) &runp) >= 0) { /* We already have an entry with this key. Check whether it is @@ -1144,7 +2133,49 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale, do { - cmpres = wcscmp (elemp->name, runp->name); + cmpres = wcscmp (elemp->namewc, runp->namewc); + if (cmpres <= 0) + break; + prevp = runp; + } + while ((runp = runp->next) != NULL); + + if (cmpres == 0) + lr_error (ldfile, _("\ +duplicate collating element definition (repertoire)")); + else + { + elemp->next = runp; + if (prevp == NULL) + { + if (set_entry (&collate->resultwc, elemp->namewc, + sizeof (uint32_t), elemp) < 0) + error (EXIT_FAILURE, 0, _("\ +error while inserting collation element into hash table")); + } + else + prevp->next = elemp; + } + } + else + { + elemp->next = NULL; + if (insert_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t), + elemp) < 0) + error (EXIT_FAILURE, errno, _("error while inserting to hash table")); + } + + /* Now also insert the element definition in the multibyte table. */ + if (find_entry (&collate->resultmb, elemp->namemb, 1, (void *) &runp) >= 0) + { + /* We already have an entry with this key. Check whether it is + identical. */ + element_t *prevp = NULL; + int cmpres; + + do + { + cmpres = strcmp (elemp->namemb, runp->namemb); if (cmpres <= 0) break; prevp = runp; @@ -1152,14 +2183,14 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale, while ((runp = runp->next) != NULL); if (cmpres == 0) - lr_error (lr, _("duplicate collating element definition")); + lr_error (ldfile, _("\ +duplicate collating element definition (charmap)")); else { elemp->next = runp; if (prevp == NULL) { - if (set_entry (&collate->result, elemp->name, sizeof (wchar_t), - elemp) < 0) + if (set_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0) error (EXIT_FAILURE, 0, _("\ error while inserting collation element into hash table")); } @@ -1170,32 +2201,41 @@ error while inserting collation element into hash table")); else { elemp->next = NULL; - if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp) - < 0) + if (insert_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0) error (EXIT_FAILURE, errno, _("error while inserting to hash table")); } - if (insert_entry (&collate->elements, collate->combine_token, - collate->combine_token_len, (void *) elemp) < 0) - lr_error (lr, _("cannot insert new collating symbol definition: %s"), + /* Finally install the mapping from the `to'-name to the `from'-name. */ + if (insert_entry (&collate->elements, to_str, strlen (to_str), + (void *) elemp) < 0) + lr_error (ldfile, _("cannot insert new collating symbol definition: %s"), strerror (errno)); } -void -collate_symbol (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +static void +collate_symbol (struct linereader *ldfile, struct locale_collate_t *collate, + struct token *code, struct charmap_t *charmap, + struct repertoire_t *repertoire) { - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - wchar_t value; + uint32_t value; + struct charseq *seq; void *not_used; - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + seq = charset_find_value (charmap, code->val.str.start, code->val.str.len); + if (seq != NULL) + { + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in charmap"), + (int) code->val.str.len, code->val.str.start); + return; + } + + value = repertoire (repertoire, code->val.str.start, code->val.str.len); if (value != ILLEGAL_CHAR_VALUE) { - lr_error (lr, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in charset"), + lr_error (ldfile, _("symbol for multicharacter collating element " + "`%.*s' duplicates symbolic name in repertoire"), (int) code->val.str.len, code->val.str.start); return; } @@ -1203,7 +2243,7 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale, if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " + lr_error (ldfile, _("symbol for multicharacter collating element " "`%.*s' duplicates element definition"), (int) code->val.str.len, code->val.str.start); return; @@ -1212,7 +2252,7 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale, if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len, ¬_used) >= 0) { - lr_error (lr, _("symbol for multicharacter collating element " + lr_error (ldfile, _("symbol for multicharacter collating element " "`%.*s' duplicates other symbol definition"), (int) code->val.str.len, code->val.str.start); return; @@ -1220,13 +2260,13 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale, if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len, (void *) 0) < 0) - lr_error (lr, _("cannot insert new collating symbol definition: %s"), + lr_error (ldfile, _("cannot insert new collating symbol definition: %s"), strerror (errno)); } void -collate_new_order (struct linereader *lr, struct localedef_t *locale, +collate_new_order (struct linereader *ldfile, struct localedef_t *locale, enum coll_sort_rule sort_rule) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; @@ -1245,7 +2285,7 @@ collate_new_order (struct linereader *lr, struct localedef_t *locale, void -collate_build_arrays (struct linereader *lr, struct localedef_t *locale) +collate_build_arrays (struct linereader *ldfile, struct localedef_t *locale) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; @@ -1264,13 +2304,13 @@ collate_build_arrays (struct linereader *lr, struct localedef_t *locale) int -collate_order_elem (struct linereader *lr, struct localedef_t *locale, +collate_order_elem (struct linereader *ldfile, struct localedef_t *locale, struct token *code, struct charset_t *charset) { - const wchar_t zero = L'\0'; + const uint32_t zero = L'\0'; struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; int result = 0; - wchar_t value; + uint32_t value; void *tmp; unsigned int i; @@ -1286,7 +2326,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, collate->kind = character; - if (find_entry (&collate->result, &value, sizeof (wchar_t), + if (find_entry (&collate->result, &value, sizeof (uint32_t), (void *) &firstp) < 0) firstp = lastp = NULL; else @@ -1299,9 +2339,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, if (lastp->name[0] == value && lastp->name[1] == L'\0') { - lr_error (lr, _("duplicate definition for character `%.*s'"), + lr_error (ldfile, + _("duplicate definition for character `%.*s'"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; break; } @@ -1315,7 +2356,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, obstack_grow (&collate->element_mem, &zero, sizeof (zero)); collate->current_element->name = - (const wchar_t *) obstack_finish (&collate->element_mem); + (const uint32_t *) obstack_finish (&collate->element_mem); collate->current_element->this_weight = ++collate->order_cnt; @@ -1323,10 +2364,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, if (firstp == NULL) { - if (insert_entry (&collate->result, &value, sizeof (wchar_t), + if (insert_entry (&collate->result, &value, sizeof (uint32_t), (void *) collate->current_element) < 0) { - lr_error (lr, _("cannot insert collation element `%.*s'"), + lr_error (ldfile, _("cannot insert collation element `%.*s'"), (int) code->val.str.len, code->val.str.start); exit (4); } @@ -1341,10 +2382,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, if (collate->current_element->this_weight != 0) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ collation element `%.*s' appears more than once: ignore line"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; break; } @@ -1359,10 +2400,10 @@ collation element `%.*s' appears more than once: ignore line"), if ((unsigned long int) tmp != 0ul) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ collation symbol `%.*s' appears more than once: ignore line"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; break; } @@ -1372,16 +2413,16 @@ collation symbol `%.*s' appears more than once: ignore line"), if (set_entry (&collate->symbols, code->val.str.start, code->val.str.len, (void *) order) < 0) { - lr_error (lr, _("cannot process order specification")); + lr_error (ldfile, _("cannot process order specification")); exit (4); } } else { if (verbose) - lr_error (lr, _("unknown symbol `%.*s': line ignored"), + lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; } @@ -1395,7 +2436,7 @@ collation symbol `%.*s' appears more than once: ignore line"), case tok_ellipsis: if (collate->was_ellipsis) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ two lines in a row containing `...' are not allowed")); result = -1; } @@ -1403,9 +2444,9 @@ two lines in a row containing `...' are not allowed")); { /* An ellipsis requires the previous line to be an character definition. */ - lr_error (lr, _("\ + lr_error (ldfile, _("\ line before ellipsis does not contain definition for character constant")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; } else @@ -1424,21 +2465,21 @@ line before ellipsis does not contain definition for character constant")); { if (collate->kind != character) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ line after ellipsis must contain character definition")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); result = -1; } else if (collate->last_char > value) { - lr_error (lr, _("end point of ellipsis range is bigger then start")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("end point of ellipsis range is bigger then start")); + lr_ignore_rest (ldfile, 0); result = -1; } else { /* We can fill the arrays with the information we need. */ - wchar_t name[2]; + uint32_t name[2]; unsigned int *data; size_t *ptr; size_t cnt; @@ -1450,9 +2491,6 @@ line after ellipsis must contain character definition")); * sizeof (unsigned int)); ptr = (size_t *) alloca (collate->nrules * sizeof (size_t)); - if (data == NULL || ptr == NULL) - error (4, 0, _("memory exhausted")); - /* Prepare data. Because the characters covered by an ellipsis all have equal values we prepare the data once and only change the variable number (if there are any). @@ -1470,7 +2508,7 @@ line after ellipsis must contain character definition")); data[collate->nrules + cnt] = collate->weight[cnt]; for (cnt = 0; cnt < collate->nrules; ++cnt) - if ((wchar_t) data[ptr[cnt]] != ELLIPSIS_CHAR) + if ((uint32_t) data[ptr[cnt]] != ELLIPSIS_CHAR) ptr[cnt] = 0; while (name[0] <= value) @@ -1479,12 +2517,9 @@ line after ellipsis must contain character definition")); pelem = (element_t *) obstack_alloc (&collate->element_mem, sizeof (element_t)); - if (pelem == NULL) - error (4, 0, _("memory exhausted")); - pelem->name - = (const wchar_t *) obstack_copy (&collate->element_mem, - name, 2 * sizeof (wchar_t)); + = (const uint32_t *) obstack_copy (&collate->element_mem, + name, 2 * sizeof (uint32_t)); pelem->this_weight = ++collate->order_cnt; pelem->ordering_len = collate->nweight; @@ -1500,17 +2535,17 @@ line after ellipsis must contain character definition")); pelem->ordering[ptr[cnt]] = pelem->this_weight; /* Insert new entry into result table. */ - if (find_entry (&collate->result, name, sizeof (wchar_t), + if (find_entry (&collate->result, name, sizeof (uint32_t), (void *) &pelem->next) >= 0) { - if (set_entry (&collate->result, name, sizeof (wchar_t), + if (set_entry (&collate->result, name, sizeof (uint32_t), (void *) pelem) < 0) error (4, 0, _("cannot insert into result table")); } else { pelem->next = NULL; - if (insert_entry (&collate->result, name, sizeof (wchar_t), + if (insert_entry (&collate->result, name, sizeof (uint32_t), (void *) pelem) < 0) error (4, 0, _("cannot insert into result table")); } @@ -1533,12 +2568,12 @@ line after ellipsis must contain character definition")); int -collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, +collate_weight_bsymbol (struct linereader *ldfile, struct localedef_t *locale, struct token *code, struct charset_t *charset) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; unsigned int here_weight; - wchar_t value; + uint32_t value; void *tmp; assert (code->tok == tok_bsymbol); @@ -1549,7 +2584,7 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, { element_t *runp; - if (find_entry (&collate->result, &value, sizeof (wchar_t), + if (find_entry (&collate->result, &value, sizeof (uint32_t), (void *)&runp) < 0) runp = NULL; @@ -1574,9 +2609,9 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, else { if (verbose) - lr_error (lr, _("unknown symbol `%.*s': line ignored"), + lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1584,9 +2619,9 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, weight. */ if (collate->kind == symbol) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ specification of sorting weight for collation symbol does not make sense")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1606,8 +2641,8 @@ specification of sorting weight for collation symbol does not make sense")); newp = (patch_t *) obstack_alloc (&collate->element_mem, sizeof (patch_t)); - newp->fname = lr->fname; - newp->lineno = lr->lineno; + newp->fname = ldfile->fname; + newp->lineno = ldfile->lineno; newp->token = (const char *) obstack_copy0 (&collate->element_mem, code->val.str.start, code->val.str.len); @@ -1624,23 +2659,23 @@ specification of sorting weight for collation symbol does not make sense")); int -collate_next_weight (struct linereader *lr, struct localedef_t *locale) +collate_next_weight (struct linereader *ldfile, struct localedef_t *locale) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; if (collate->kind == symbol) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ specification of sorting weight for collation symbol does not make sense")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } ++collate->weight_idx; if (collate->weight_idx >= collate->nrules) { - lr_error (lr, _("too many weights")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("too many weights")); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1649,7 +2684,7 @@ specification of sorting weight for collation symbol does not make sense")); int -collate_simple_weight (struct linereader *lr, struct localedef_t *locale, +collate_simple_weight (struct linereader *ldfile, struct localedef_t *locale, struct token *code, struct charset_t *charset) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; @@ -1668,9 +2703,9 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, entry. */ if (collate->kind != ellipsis && collate->kind != undefined) { - lr_error (lr, _("\ + lr_error (ldfile, _("\ `...' must only be used in `...' and `UNDEFINED' entries")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } value = ELLIPSIS_CHAR; @@ -1691,18 +2726,18 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, { char *startp = (char *) runp; char *putp = (char *) runp; - wchar_t wch; + uint32_t wch; /* Lookup weight for char and store it. */ if (*runp == '<') { while (*++runp != '\0' && *runp != '>') { - if (*runp == lr->escape_char) + if (*runp == ldfile->escape_char) if (*++runp == '\0') { - lr_error (lr, _("unterminated weight name")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("unterminated weight name")); + lr_ignore_rest (ldfile, 0); return -1; } *putp++ = *runp; @@ -1712,8 +2747,8 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, if (putp == startp) { - lr_error (lr, _("empty weight name: line ignored")); - lr_ignore_rest (lr, 0); + lr_error (ldfile, _("empty weight name: line ignored")); + lr_ignore_rest (ldfile, 0); return -1; } @@ -1723,7 +2758,7 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, { element_t *pelem; - if (find_entry (&collate->result, &wch, sizeof (wchar_t), + if (find_entry (&collate->result, &wch, sizeof (uint32_t), (void *)&pelem) < 0) pelem = NULL; @@ -1749,30 +2784,30 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, else { if (verbose) - lr_error (lr, _("unknown symbol `%.*s': line ignored"), + lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), (int) (putp - startp), startp); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } } else { element_t *wp; - wchar_t wch; + uint32_t wch; - if (*runp == lr->escape_char) + if (*runp == ldfile->escape_char) { static const char digits[] = "0123456789abcdef"; const char *dp; int base; ++runp; - if (_tolower (*runp) == 'x') + if (tolower (*runp) == 'x') { ++runp; base = 16; } - else if (_tolower (*runp) == 'd') + else if (tolower (*runp) == 'd') { ++runp; base = 10; @@ -1780,19 +2815,19 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, else base = 8; - dp = strchr (digits, _tolower (*runp)); + dp = strchr (digits, tolower (*runp)); if (dp == NULL || (dp - digits) >= base) { illegal_char: - lr_error (lr, _("\ + lr_error (ldfile, _("\ illegal character constant in string")); - lr_ignore_rest (lr, 0); + lr_ignore_rest (ldfile, 0); return -1; } wch = dp - digits; ++runp; - dp = strchr (digits, _tolower (*runp)); + dp = strchr (digits, tolower (*runp)); if (dp == NULL || (dp - digits) >= base) goto illegal_char; wch *= base; @@ -1801,7 +2836,7 @@ illegal character constant in string")); if (base != 16) { - dp = strchr (digits, _tolower (*runp)); + dp = strchr (digits, tolower (*runp)); if (dp != NULL && (dp - digits < base)) { wch *= base; @@ -1811,7 +2846,7 @@ illegal character constant in string")); } } else - wch = (wchar_t) *runp++; + wch = (uint32_t) *runp++; /* Lookup the weight for WCH. */ if (find_entry (&collate->result, &wch, sizeof (wch), @@ -1849,8 +2884,8 @@ illegal character constant in string")); newp = (patch_t *) obstack_alloc (&collate->element_mem, sizeof (patch_t)); - newp->fname = lr->fname; - newp->lineno = lr->lineno; + newp->fname = ldfile->fname; + newp->lineno = ldfile->lineno; newp->token = (const char *) obstack_copy0 (&collate->element_mem, startp, putp - startp); @@ -1885,7 +2920,7 @@ illegal character constant in string")); void -collate_end_weight (struct linereader *lr, struct localedef_t *locale) +collate_end_weight (struct linereader *ldfile, struct localedef_t *locale) { struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; element_t *pelem = collate->current_element; @@ -1951,3 +2986,239 @@ collate_end_weight (struct linereader *lr, struct localedef_t *locale) if (collate->kind != undefined) collate->last_char = pelem->name[0]; } + + +/* The parser for the LC_CTYPE section of the locale definition. */ +void +read_lc_collate (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, struct repertoire_t *repertoire, + int ignore_content) +{ + struct locale_collate_t *collate; + int did_copy = 0; + const char *save_str; + + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, tok_lc_collate, LC_COLLATE, "LC_COLLATE", + ignore_content); + did_copy = 1; + } + + /* Prepare the data structures. */ + collate_startup (ldfile, result, charmap, ignore_content); + collate = result->categories[LC_COLLATE].collate; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_coll_weight_max: + if (did_copy) + goto err_label; + /* The rest of the line must be a single integer value. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_number) + goto err_label; + /* We simply forget about the value we just read, the implementation + has no fixed limits. */ + lr_ignore_rest (ldfile, 1); + break; + + case tok_script: + if (did_copy) + goto err_label; + /* We expect the name of the script in brackets. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_bsymbol && now->tok != tok_ucs4) + goto err_label; + if (now->tok != tok_bsymbol) + { + lr_error (ldfile, _("\ +script name `%s' must not duplicate any known name"), + tok->val.str.startmb); + lr_ignore_rest (ldfile, 0); + break; + } + collate->scripts = xmalloc (collate->scripts, + (collate->nscripts + * sizeof (const char *))); + collate->scripts[collate->nscripts++] = tok->val.str.startmb; + lr_ignore_rest (ldfile, 1); + break; + + case tok_collating_element: + if (did_copy) + goto err_label; + /* Get the first argument, a symbol in brackets. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_bsymbol) + goto err_label; + /* Test it. */ + if (collate_element_to (ldfile, collate, now, charmap, repertoire)) + { + /* An error occurred. */ + lr_ignore_rest (ldfile, 0); + break; + } + save_str = tok->val.str.startmb; + /* Next comes `from'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_from) + goto err_label; + /* Now comes a string. */ + now = lr_token (ldfile, charmap, repertoire); + if (now->tok != tok_string) + goto err_label; + collate_element_from (ldfile, collate, save_str, now, charmap, + repertoire); + /* The rest of the line should be empty. */ + lr_ignore_rest (ldfile, 1); + break; + + case tok_collating_symbol: + if (did_copy) + goto err_label; + /* Get the argument, a single symbol in brackets. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_bsymbol) + goto err_label; + collate_symbol (ldfile, collate, now, charmap, repertoire); + break; + + case tok_order_start: + if (did_copy) + goto err_label; + + /* We expect now a scripting symbol or start right away + with the order keywords. Or we have no argument at all + in which means `forward'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eol) + { + static enum coll_sort_rule default_rule = sort_forward; + /* Use a single `forward' rule. */ + collate->nrules = 1; + collate->rules = &default_rule; + } + else + { + /* XXX We don't recognize the ISO 14651 extensions yet. */ + uint32_t nrules = 0; + uint32_t nrules_max = 32; + enum coll_sort_rule *rules = alloca (nrules_max + * sizeof (*rules)); + int saw_semicolon = 0; + + memset (rules, '\0', nrules_max * sizeof (*rules)); + do + { + if (now->tok != tok_forward && now->tok != tok_backward + && now->tok != tok_position) + goto err_label; + + if (saw_semicolon) + { + if (nrules == nrules_max) + { + newp = alloca (nrules_max * 2 * sizeof (*rules)); + rules = memcpy (newp, rules, + nrules_max * sizeof (*rules)); + memset (&rules[nrules_max], '\0', + nrules_max * sizeof (*rules)); + nrules_max *= 2; + } + ++nrules; + } + + switch (now->tok) + { + case tok_forward: + if ((rules[nrules] & sort_backward) != 0) + { + lr_error (ldfile, _("\ +`forward' and `backward' order exclude each other")); + lr_ignore_rest (ldfile, 0); + goto error_sort; + } + rules[nrules] |= sort_forward; + break; + case tok_backward: + if ((rules[nrules] & sort_forward) != 0) + { + lr_error (ldfile, _("\ +`forward' and `backward' order exclude each other")); + lr_ignore_rest (ldfile, 0); + goto error_sort; + } + rules[nrules] |= sort_backward; + break; + case tok_position: + rules[nrules] |= tok_position; + break; + } + + /* Get the next token. This is either the end of the line, + a comma or a semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_comma || now->tok == tok_semicolon) + { + saw_semicolon = now->tok == tok_semicolon; + now = lr_token (ldfile, charmap, NULL); + } + } + while (now->tok != tok_eol || now->tok != tok_eof); + + error_sort: + collate->nrules = nrules; + collate->rules = memcpy (xmalloc (nrules * sizeof (*rules)), + rules, nrules * sizeof (*rules)); + } + + /* Now read the rules. */ + read_rules (ldfile, collate, charmap, repertoire); + break; + + case tok_reorder_after: + break; + + case tok_reorder_script_after: + break; + + default: + err_label: + if (now->tok != tok_eof) + SYNTAX_ERROR (_("syntax error in %s locale definition"), + "LC_COLLATE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("premature end of file while reading category `%s'"), + "LC_COLLATE"); +} + +#endif diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index 714a718..6743c18 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -1,6 +1,6 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -22,183 +22,274 @@ #endif #include <alloca.h> +#include <byteswap.h> #include <endian.h> +#include <errno.h> #include <limits.h> +#include <obstack.h> +#include <stdlib.h> #include <string.h> -#include <libintl.h> +#include <wchar.h> +#include <wctype.h> +#include <sys/uio.h> -#include "locales.h" +#include "charmap.h" #include "localeinfo.h" #include "langinfo.h" +#include "linereader.h" #include "locfile-token.h" -#include "stringtrans.h" +#include "locfile.h" +#include "localedef.h" -/* Uncomment the following line in the production version. */ -/* define NDEBUG 1 */ #include <assert.h> -void *xmalloc (size_t __n); -void *xcalloc (size_t __n, size_t __s); -void *xrealloc (void *__ptr, size_t __n); +/* These are the extra bits not in wctype.h since these are not preallocated + classes. */ +#define _ISwspecial1 (1 << 29) +#define _ISwspecial2 (1 << 30) +#define _ISwspecial3 (1 << 31) /* The bit used for representing a special class. */ #define BITPOS(class) ((class) - tok_upper) -#define BIT(class) (1 << BITPOS (class)) +#define BIT(class) (_ISbit (BITPOS (class))) +#define BITw(class) (_ISwbit (BITPOS (class))) #define ELEM(ctype, collection, idx, value) \ *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \ &ctype->collection##_act idx, value) -#define SWAPU32(w) \ - (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) - -#define SWAPU16(w) \ - ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8)) - /* To be compatible with former implementations we for now restrict the number of bits for character classes to 16. When compatibility is not necessary anymore increase the number to 32. */ -#define char_class_t u_int16_t -#define CHAR_CLASS_TRANS SWAPU16 -#define char_class32_t u_int32_t -#define CHAR_CLASS32_TRANS SWAPU32 +#define char_class_t uint16_t +#define CHAR_CLASS_TRANS bswap_16 +#define char_class32_t uint32_t +#define CHAR_CLASS32_TRANS bswap_32 + + +/* Type to describe a transliteration action. We have a possibly + multiple character from-string and a set of multiple character + to-strings. All are 32bit values since this is what is used in + the gconv functions. */ +struct translit_to_t +{ + uint32_t *str; + + struct translit_to_t *next; +}; + +struct translit_t +{ + uint32_t *from; + + struct translit_to_t *to; + + struct translit_t *next; +}; /* The real definition of the struct for the LC_CTYPE locale. */ struct locale_ctype_t { - unsigned int *charnames; + uint32_t *charnames; size_t charnames_max; size_t charnames_act; - /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */ -#define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1) + struct repertoire_t *repertoire; + + /* We will allow up to 8 * sizeof (uint32_t) character classes. */ +#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t)) size_t nr_charclass; const char *classnames[MAX_NR_CHARCLASS]; - unsigned long int current_class_mask; - unsigned int last_class_char; - u_int32_t *class_collection; + uint32_t last_class_char; + uint32_t class256_collection[256]; + uint32_t *class_collection; size_t class_collection_max; size_t class_collection_act; - unsigned long int class_done; + uint32_t class_done; + + struct charseq **mbdigits; + size_t mbdigits_act; + size_t mbdigits_max; + uint32_t *wcdigits; + size_t wcdigits_act; + size_t wcdigits_max; + + struct charseq *mboutdigits[10]; + uint32_t wcoutdigits[10]; + size_t outdigits_act; /* If the following number ever turns out to be too small simply increase it. But I doubt it will. --drepper@gnu */ #define MAX_NR_CHARMAP 16 const char *mapnames[MAX_NR_CHARMAP]; - u_int32_t *map_collection[MAX_NR_CHARMAP]; + uint32_t *map_collection[MAX_NR_CHARMAP]; + uint32_t map256_collection[2][256]; size_t map_collection_max[MAX_NR_CHARMAP]; size_t map_collection_act[MAX_NR_CHARMAP]; size_t map_collection_nr; size_t last_map_idx; - unsigned int from_map_char; - int toupper_done; - int tolower_done; + int tomap_done[MAX_NR_CHARMAP]; + + /* Transliteration information. */ + const char *translit_copy_locale; + const char *translit_copy_repertoire; + struct translit_t *translit; /* The arrays for the binary representation. */ - u_int32_t plane_size; - u_int32_t plane_cnt; + uint32_t plane_size; + uint32_t plane_cnt; char_class_t *ctype_b; char_class32_t *ctype32_b; - u_int32_t *names_el; - u_int32_t *names_eb; - u_int32_t **map_eb; - u_int32_t **map_el; - u_int32_t *class_name_ptr; - u_int32_t *map_name_ptr; + uint32_t *names_el; + uint32_t *names_eb; + uint32_t **map_eb; + uint32_t **map_el; + uint32_t *class_name_ptr; + uint32_t *map_name_ptr; unsigned char *width; - u_int32_t mb_cur_max; + uint32_t mb_cur_max; const char *codeset_name; + uint32_t translit_hash_size_eb; + uint32_t translit_hash_size_el; + uint32_t translit_hash_layers_eb; + uint32_t translit_hash_layers_el; + uint32_t *translit_from_idx_eb; + uint32_t *translit_from_idx_el; + uint32_t *translit_from_tbl_eb; + uint32_t *translit_from_tbl_el; + uint32_t *translit_to_idx_eb; + uint32_t *translit_to_idx_el; + uint32_t *translit_to_tbl_eb; + uint32_t *translit_to_tbl_el; + size_t translit_idx_size; + size_t translit_from_tbl_size; + size_t translit_to_tbl_size; + + struct obstack mem_pool; }; +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + + /* Prototypes for local functions. */ -static void ctype_class_newP (struct linereader *lr, - struct locale_ctype_t *ctype, const char *name); -static void ctype_map_newP (struct linereader *lr, - struct locale_ctype_t *ctype, - const char *name, struct charset_t *charset); -static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table, - size_t *max, size_t *act, unsigned int idx); +static void ctype_startup (struct linereader *lr, struct localedef_t *locale, + struct charmap_t *charmap, int ignore_content); +static void ctype_class_new (struct linereader *lr, + struct locale_ctype_t *ctype, const char *name); +static void ctype_map_new (struct linereader *lr, + struct locale_ctype_t *ctype, + const char *name, struct charmap_t *charmap); +static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table, + size_t *max, size_t *act, unsigned int idx); static void set_class_defaults (struct locale_ctype_t *ctype, - struct charset_t *charset); + struct charmap_t *charmap, + struct repertoire_t *repertoire); static void allocate_arrays (struct locale_ctype_t *ctype, - struct charset_t *charset); + struct charmap_t *charmap, + struct repertoire_t *repertoire); -void +static const char *longnames[] = +{ + "zero", "one", "two", "three", "four", + "five", "six", "seven", "eight", "nine" +}; +static const unsigned char digits[] = "0123456789"; + + +static void ctype_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) + struct charmap_t *charmap, int ignore_content) { unsigned int cnt; struct locale_ctype_t *ctype; - /* We have a definition for LC_CTYPE. */ - copy_posix.mask &= ~(1 << LC_CTYPE); - - /* It is important that we always use UCS1 encoding for strings now. */ - encoding_method = ENC_UCS1; - - /* Allocate the needed room. */ - locale->categories[LC_CTYPE].ctype = ctype = - (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t)); - - /* We have no names seen yet. */ - ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512; - ctype->charnames = - (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int)); - for (cnt = 0; cnt < 256; ++cnt) - ctype->charnames[cnt] = cnt; - ctype->charnames_act = 256; - - /* Fill character class information. */ - ctype->nr_charclass = 0; - ctype->current_class_mask = 0; - ctype->last_class_char = ILLEGAL_CHAR_VALUE; - /* The order of the following instructions determines the bit - positions! */ - ctype_class_newP (lr, ctype, "upper"); - ctype_class_newP (lr, ctype, "lower"); - ctype_class_newP (lr, ctype, "alpha"); - ctype_class_newP (lr, ctype, "digit"); - ctype_class_newP (lr, ctype, "xdigit"); - ctype_class_newP (lr, ctype, "space"); - ctype_class_newP (lr, ctype, "print"); - ctype_class_newP (lr, ctype, "graph"); - ctype_class_newP (lr, ctype, "blank"); - ctype_class_newP (lr, ctype, "cntrl"); - ctype_class_newP (lr, ctype, "punct"); - ctype_class_newP (lr, ctype, "alnum"); - - ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512; - ctype->class_collection - = (u_int32_t *) xmalloc (sizeof (unsigned long int) - * ctype->class_collection_max); - memset (ctype->class_collection, '\0', - sizeof (unsigned long int) * ctype->class_collection_max); - ctype->class_collection_act = 256; - - /* Fill character map information. */ - ctype->map_collection_nr = 0; - ctype->last_map_idx = MAX_NR_CHARMAP; - ctype->from_map_char = ILLEGAL_CHAR_VALUE; - ctype_map_newP (lr, ctype, "toupper", charset); - ctype_map_newP (lr, ctype, "tolower", charset); - - /* Fill first 256 entries in `toupper' and `tolower' arrays. */ - for (cnt = 0; cnt < 256; ++cnt) + if (!ignore_content) { - ctype->map_collection[0][cnt] = cnt; - ctype->map_collection[1][cnt] = cnt; + /* Allocate the needed room. */ + locale->categories[LC_CTYPE].ctype = ctype = + (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t)); + + /* We have seen no names yet. */ + ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->charnames = + (unsigned int *) xmalloc (ctype->charnames_max + * sizeof (unsigned int)); + for (cnt = 0; cnt < 256; ++cnt) + ctype->charnames[cnt] = cnt; + ctype->charnames_act = 256; + + /* Fill character class information. */ + ctype->last_class_char = ILLEGAL_CHAR_VALUE; + /* The order of the following instructions determines the bit + positions! */ + ctype_class_new (lr, ctype, "upper"); + ctype_class_new (lr, ctype, "lower"); + ctype_class_new (lr, ctype, "alpha"); + ctype_class_new (lr, ctype, "digit"); + ctype_class_new (lr, ctype, "xdigit"); + ctype_class_new (lr, ctype, "space"); + ctype_class_new (lr, ctype, "print"); + ctype_class_new (lr, ctype, "graph"); + ctype_class_new (lr, ctype, "blank"); + ctype_class_new (lr, ctype, "cntrl"); + ctype_class_new (lr, ctype, "punct"); + ctype_class_new (lr, ctype, "alnum"); + /* The following are extensions from ISO 14652. */ + ctype_class_new (lr, ctype, "left_to_right"); + ctype_class_new (lr, ctype, "right_to_left"); + ctype_class_new (lr, ctype, "num_terminator"); + ctype_class_new (lr, ctype, "num_separator"); + ctype_class_new (lr, ctype, "segment_separator"); + ctype_class_new (lr, ctype, "block_separator"); + ctype_class_new (lr, ctype, "direction_control"); + ctype_class_new (lr, ctype, "sym_swap_layout"); + ctype_class_new (lr, ctype, "char_shape_selector"); + ctype_class_new (lr, ctype, "num_shape_selector"); + ctype_class_new (lr, ctype, "non_spacing"); + ctype_class_new (lr, ctype, "non_spacing_level3"); + ctype_class_new (lr, ctype, "normal_connect"); + ctype_class_new (lr, ctype, "r_connect"); + ctype_class_new (lr, ctype, "no_connect"); + ctype_class_new (lr, ctype, "no_connect-space"); + ctype_class_new (lr, ctype, "vowel_connect"); + + ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->class_collection + = (uint32_t *) xcalloc (sizeof (unsigned long int), + ctype->class_collection_max); + ctype->class_collection_act = 256; + + /* Fill character map information. */ + ctype->map_collection_nr = 0; + ctype->last_map_idx = MAX_NR_CHARMAP; + ctype_map_new (lr, ctype, "toupper", charmap); + ctype_map_new (lr, ctype, "tolower", charmap); + ctype_map_new (lr, ctype, "tosymmetric", charmap); + + /* Fill first 256 entries in `toXXX' arrays. */ + for (cnt = 0; cnt < 256; ++cnt) + { + ctype->map_collection[0][cnt] = cnt; + ctype->map_collection[1][cnt] = cnt; + ctype->map_collection[2][cnt] = cnt; + ctype->map256_collection[0][cnt] = cnt; + ctype->map256_collection[1][cnt] = cnt; + } + + obstack_init (&ctype->mem_pool); } } void -ctype_finish (struct localedef_t *locale, struct charset_t *charset) +ctype_finish (struct localedef_t *locale, struct charmap_t *charmap) { /* See POSIX.2, table 2-6 for the meaning of the following table. */ #define NCLASS 12 @@ -226,106 +317,138 @@ ctype_finish (struct localedef_t *locale, struct charset_t *charset) }; size_t cnt; int cls1, cls2; - unsigned int space_value; + uint32_t space_value; + struct charseq *space_seq; struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + int warned; /* Set default value for classes not specified. */ - set_class_defaults (ctype, charset); + set_class_defaults (ctype, charmap, ctype->repertoire); /* Check according to table. */ for (cnt = 0; cnt < ctype->class_collection_max; ++cnt) { - unsigned long int tmp; + uint32_t tmp = ctype->class_collection[cnt]; - tmp = ctype->class_collection[cnt]; - if (tmp == 0) - continue; - - for (cls1 = 0; cls1 < NCLASS; ++cls1) - if ((tmp & (1 << cls1)) != 0) - for (cls2 = 0; cls2 < NCLASS; ++cls2) - if (valid_table[cls1].allow[cls2] != '-') - { - int eq = (tmp & (1 << cls2)) != 0; - switch (valid_table[cls1].allow[cls2]) + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISwbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') { - case 'M': - if (!eq) + int eq = (tmp & _ISwbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) { - char buf[17]; - char *cp = buf; - unsigned int value; - - value = ctype->charnames[cnt]; - - if ((value & 0xff000000) != 0) - cp += sprintf (cp, "\\%o", (value >> 24) & 0xff); - if ((value & 0xffff0000) != 0) - cp += sprintf (cp, "\\%o", (value >> 16) & 0xff); - if ((value & 0xffffff00) != 0) - cp += sprintf (cp, "\\%o", (value >> 8) & 0xff); - sprintf (cp, "\\%o", value & 0xff); - - if (!be_quiet) - error (0, 0, _("\ -character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "", - buf, valid_table[cls1].name, - valid_table[cls2].name); + case 'M': + if (!eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must be in class `%s'"), + value > 0xffff ? 8 : 4, value, + valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'X': + if (eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must not be in class `%s'"), + value > 0xffff ? 8 : 4, value, + valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'D': + ctype->class_collection[cnt] |= _ISwbit (cls2); + break; + + default: + error (5, 0, _("internal error in %s, line %u"), + __FUNCTION__, __LINE__); } - break; + } + } + } + + for (cnt = 0; cnt < 256; ++cnt) + { + uint32_t tmp = ctype->class256_collection[cnt]; - case 'X': - if (eq) + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') + { + int eq = (tmp & _ISbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) { - char buf[17]; - char *cp = buf; - unsigned int value; - - value = ctype->charnames[cnt]; - - if ((value & 0xff000000) != 0) - cp += sprintf (cp, "\\%o", value >> 24); - if ((value & 0xffff0000) != 0) - cp += sprintf (cp, "\\%o", (value >> 16) & 0xff); - if ((value & 0xffffff00) != 0) - cp += sprintf (cp, "\\%o", (value >> 8) & 0xff); - sprintf (cp, "\\%o", value & 0xff); - - if (!be_quiet) - error (0, 0, _("\ -character %s'%s' in class `%s' must not be in class `%s'"), - value > 256 ? "L" : "", buf, - valid_table[cls1].name, - valid_table[cls2].name); + case 'M': + if (!eq) + { + char buf[17]; + + sprintf (buf, "\\%o", cnt); + + if (!be_quiet) + error (0, 0, _("\ +character '%s' in class `%s' must be in class `%s'"), + buf, valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'X': + if (eq) + { + char buf[17]; + + sprintf (buf, "\\%o", cnt); + + if (!be_quiet) + error (0, 0, _("\ +character '%s' in class `%s' must not be in class `%s'"), + buf, valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'D': + ctype->class256_collection[cnt] |= _ISbit (cls2); + break; + + default: + error (5, 0, _("internal error in %s, line %u"), + __FUNCTION__, __LINE__); } - break; - - case 'D': - ctype->class_collection[cnt] |= 1 << cls2; - break; - - default: - error (5, 0, _("internal error in %s, line %u"), - __FUNCTION__, __LINE__); - } - } + } + } } /* ... and now test <SP> as a special case. */ - space_value = charset_find_value (&charset->char_table, "SP", 2); - if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE) - space_value = charset_find_value (&charset->char_table, "space", 5); - if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE) + space_value = repertoire_find_value (ctype->repertoire, "SP", 2); + if (space_value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("character <SP> not defined in character map")); } else if (((cnt = BITPOS (tok_space), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_space)) == 0) + & BITw (tok_space)) == 0) || (cnt = BITPOS (tok_blank), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_blank)) == 0))) + & BITw (tok_blank)) == 0))) { if (!be_quiet) error (0, 0, _("<SP> character not in class `%s'"), @@ -333,10 +456,10 @@ character %s'%s' in class `%s' must not be in class `%s'"), } else if (((cnt = BITPOS (tok_punct), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_punct)) != 0) + & BITw (tok_punct)) != 0) || (cnt = BITPOS (tok_graph), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_graph)) + & BITw (tok_graph)) != 0))) { if (!be_quiet) @@ -344,24 +467,205 @@ character %s'%s' in class `%s' must not be in class `%s'"), valid_table[cnt].name); } else - ELEM (ctype, class_collection, , space_value) |= BIT (tok_print); + ELEM (ctype, class_collection, , space_value) |= BITw (tok_print); + + space_seq = charmap_find_value (charmap, "SP", 2); + if (space_seq == NULL || space_seq->nbytes != 1) + { + if (!be_quiet) + error (0, 0, _("character <SP> not defined in character map")); + } + else if (((cnt = BITPOS (tok_space), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_blank)) == 0))) + { + if (!be_quiet) + error (0, 0, _("<SP> character not in class `%s'"), + valid_table[cnt].name); + } + else if (((cnt = BITPOS (tok_punct), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_punct)) != 0) + || (cnt = BITPOS (tok_graph), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_graph)) != 0))) + { + if (!be_quiet) + error (0, 0, _("<SP> character must not be in class `%s'"), + valid_table[cnt].name); + } + else + ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); /* Now that the tests are done make sure the name array contains all characters which are handled in the WIDTH section of the character set definition file. */ - if (charset->width_rules != NULL) - for (cnt = 0; cnt < charset->nwidth_rules; ++cnt) + if (charmap->width_rules != NULL) + for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) { +#if 0 size_t inner; - for (inner = charset->width_rules[cnt].from; - inner <= charset->width_rules[cnt].to; ++inner) + for (inner = charmap->width_rules[cnt].from; + inner <= charmap->width_rules[cnt].to; ++inner) (void) find_idx (ctype, NULL, NULL, NULL, inner); +#else + /* XXX Handle width. We must convert from the charseq to the + repertoire value */ + abort (); +#endif + } + + /* There must be a multiple of 10 digits. */ + if (ctype->mbdigits_act % 10 != 0) + { + assert (ctype->mbdigits_act == ctype->wcdigits_act); + ctype->wcdigits_act -= ctype->mbdigits_act % 10; + ctype->mbdigits_act -= ctype->mbdigits_act % 10; + error (0, 0, _("`digit' category has not entries in groups of ten")); + } + + /* Check the input digits. There must be a multiple of ten available. + In each group I could be that one or the other character is missing. + In this case the whole group must be removed. */ + cnt = 0; + while (cnt < ctype->mbdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->mbdigits[cnt + inner] == NULL) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->mbdigits[0]))); + ctype->mbdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->mbdigits_act == 0) + { + if (ctype->mbdigits_max == 0) + { + ctype->mbdigits = obstack_alloc (&charmap->mem_pool, + 10 * sizeof (struct charseq *)); + ctype->mbdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + digits + cnt, 1); + if (ctype->mbdigits[cnt] == NULL) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + if (ctype->mbdigits[cnt] == NULL) + { + /* Hum, this ain't good. */ + error (0, 0, _("\ +no input digits defined and none of the standard names in the charmap")); + + ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool, + sizeof (struct charseq) + 1); + + /* This is better than nothing. */ + ctype->mbdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mbdigits[cnt]->nbytes = 1; + } + } + } + + ctype->mbdigits_act = 10; + } + + /* Check the wide character input digits. There must be a multiple + of ten available. In each group I could be that one or the other + character is missing. In this case the whole group must be + removed. */ + cnt = 0; + while (cnt < ctype->wcdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->wcdigits[0]))); + ctype->wcdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->wcdigits_act == 0) + { + if (ctype->wcdigits_max == 0) + { + ctype->wcdigits = obstack_alloc (&charmap->mem_pool, + 10 * sizeof (uint32_t)); + ctype->wcdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + ctype->wcdigits[cnt] = L'0' + cnt; + + ctype->mbdigits_act = 10; + } + + /* Check the outdigits. */ + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->mboutdigits[cnt] == NULL) + { + static struct charseq replace[2]; + + if (!warned) + { + error (0, 0, _("\ +not all characters used in `outdigit' are available in the charmap")); + warned = 1; + } + + replace[0].nbytes = 1; + replace[0].bytes[0] = '?'; + replace[0].bytes[1] = '\0'; + ctype->mboutdigits[cnt] = &replace[0]; + } + + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->wcoutdigits[cnt] == 0) + { + if (!warned) + { + error (0, 0, _("\ +not all characters used in `outdigit' are available in the repertoire")); + warned = 1; + } + + ctype->wcoutdigits[cnt] = L'?'; } } void -ctype_output (struct localedef_t *locale, struct charset_t *charset, +ctype_output (struct localedef_t *locale, struct charmap_t *charmap, const char *output_path) { struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; @@ -370,23 +674,12 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, struct iovec iov[2 + nelems + ctype->nr_charclass + ctype->map_collection_nr]; struct locale_file data; - u_int32_t idx[nelems]; + uint32_t idx[nelems + 1]; size_t elem, cnt, offset, total; - - - if ((locale->binary & (1 << LC_CTYPE)) != 0) - { - iov[0].iov_base = ctype; - iov[0].iov_len = locale->len[LC_CTYPE]; - - write_locale_data (output_path, "LC_CTYPE", 1, iov); - - return; - } - + char *cp; /* Now prepare the output: Find the sizes of the table we can use. */ - allocate_arrays (ctype, charset); + allocate_arrays (ctype, charmap, ctype->repertoire); data.magic = LIMAGIC (LC_CTYPE); data.n = nelems; @@ -419,20 +712,20 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, CTYPE_DATA (_NL_CTYPE_TOUPPER_EB, ctype->map_eb[0], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TOLOWER_EB, ctype->map_eb[1], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TOUPPER_EL, ctype->map_el[0], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TOLOWER_EL, ctype->map_el[1], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_CLASS32, ctype->ctype32_b, @@ -441,15 +734,88 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, CTYPE_DATA (_NL_CTYPE_NAMES_EB, ctype->names_eb, (ctype->plane_size * ctype->plane_cnt - * sizeof (u_int32_t))); + * sizeof (uint32_t))); CTYPE_DATA (_NL_CTYPE_NAMES_EL, ctype->names_el, (ctype->plane_size * ctype->plane_cnt - * sizeof (u_int32_t))); - - CTYPE_DATA (_NL_CTYPE_HASH_SIZE, - &ctype->plane_size, sizeof (u_int32_t)); - CTYPE_DATA (_NL_CTYPE_HASH_LAYERS, - &ctype->plane_cnt, sizeof (u_int32_t)); + * sizeof (uint32_t))); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EB, + &ctype->translit_hash_size_eb, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EL, + &ctype->translit_hash_size_el, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EB, + &ctype->translit_hash_layers_eb, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EL, + &ctype->translit_hash_layers_el, sizeof (uint32_t)); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EB, + ctype->translit_from_idx_eb, + ctype->translit_idx_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EL, + ctype->translit_from_idx_el, + ctype->translit_idx_size); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EB, + ctype->translit_from_tbl_eb, + ctype->translit_from_tbl_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EL, + ctype->translit_from_tbl_el, + ctype->translit_from_tbl_size); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EB, + ctype->translit_to_idx_eb, + ctype->translit_idx_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EL, + ctype->translit_to_idx_el, + ctype->translit_idx_size); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EB, + ctype->translit_to_tbl_eb, ctype->translit_to_tbl_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EL, + ctype->translit_to_tbl_el, ctype->translit_to_tbl_size); + +#if __BYTE_ORDER == __BIG_ENDIAN + CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EB, + &ctype->plane_size, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EB, + &ctype->plane_cnt, sizeof (uint32_t)); +#else + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EB): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_size); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EB): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_cnt); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; +#endif +#if __BYTE_ORDER == __BIG_ENDIAN + CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EL, + &ctype->plane_size, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EL, + &ctype->plane_cnt, sizeof (uint32_t)); +#else + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EL): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_size); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EL): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_cnt); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; +#endif case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES): /* The class name array. */ @@ -466,8 +832,7 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4)); total += 1 + (4 - ((total + 1) % 4)); - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + total; + idx[elem + 1] = idx[elem] + total; break; case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES): @@ -485,15 +850,14 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4)); total += 1 + (4 - ((total + 1) % 4)); - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + total; + idx[elem + 1] = idx[elem] + total; break; CTYPE_DATA (_NL_CTYPE_WIDTH, ctype->width, ctype->plane_size * ctype->plane_cnt); CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX, - &ctype->mb_cur_max, sizeof (u_int32_t)); + &ctype->mb_cur_max, sizeof (uint32_t)); case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME): total = strlen (ctype->codeset_name) + 1; @@ -508,8 +872,127 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, total = (total + 3) & ~3; } iov[2 + elem + offset].iov_len = total; - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB): + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL): + iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB) + && __BYTE_ORDER == __BIG_ENDIAN) + || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL) + && __BYTE_ORDER == __LITTLE_ENDIAN)) + *(uint32_t *) iov[2 + elem + offset].iov_base = + ctype->mbdigits_act / 10; + else + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->mbdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB): + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL): + iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB) + && __BYTE_ORDER == __BIG_ENDIAN) + || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL) + && __BYTE_ORDER == __LITTLE_ENDIAN)) + *(uint32_t *) iov[2 + elem + offset].iov_base = + ctype->wcdigits_act / 10; + else + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->wcdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB): + /* Compute the length of all possible characters. For INDIGITS + there might be more than one. We simply concatenate all of + them with a NUL byte following. The NUL byte wouldn't be + necessary but it makes it easier for the user. */ + total = 0; + for (cnt = elem - _NL_CTYPE_INDIGITS0_MB; + cnt < ctype->mbdigits_act; cnt += 10) + total += ctype->mbdigits[cnt]->nbytes + 1; + iov[2 + elem + offset].iov_base = (char *) alloca (total); + iov[2 + elem + offset].iov_len = total; + + cp = iov[2 + elem + offset].iov_base; + for (cnt = elem - _NL_CTYPE_INDIGITS0_MB; + cnt < ctype->mbdigits_act; cnt += 10) + { + cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes, + ctype->mbdigits[cnt]->nbytes); + *cp++ = '\0'; + } + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB): + /* Compute the length of all possible characters. For INDIGITS + there might be more than one. We simply concatenate all of + them with a NUL byte following. The NUL byte wouldn't be + necessary but it makes it easier for the user. */ + cnt = elem - _NL_CTYPE_OUTDIGIT0_MB; + total = ctype->mboutdigits[cnt]->nbytes + 1; + iov[2 + elem + offset].iov_base = (char *) alloca (total); + iov[2 + elem + offset].iov_len = total; + + *(char *) mempcpy (iov[2 + elem + offset].iov_base, + ctype->mbdigits[cnt]->bytes, + ctype->mbdigits[cnt]->nbytes) = '\0'; + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EB): + total = ctype->wcdigits_act / 10; + + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (total * sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = total * sizeof (uint32_t); + + for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EB; + cnt < ctype->wcdigits_act; cnt += 10) + ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10] + = (__BYTE_ORDER == __LITTLE_ENDIAN + ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EL): + total = ctype->wcdigits_act / 10; + + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (total * sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = total * sizeof (uint32_t); + + for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EL; + cnt < ctype->wcdigits_act; cnt += 10) + ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10] + = (__BYTE_ORDER == __BIG_ENDIAN + ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]); + break; + +#if __BYTE_ORDER == __BIG_ENDIAN + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB; +#else + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL; +#endif + iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt]; + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB; +#else + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL; +#endif + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->wcoutdigits[cnt]); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); break; default: @@ -527,10 +1010,9 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, iov[2 + elem + offset].iov_len = ((ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; } } @@ -541,596 +1023,1575 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, } -/* Character class handling. */ -void -ctype_class_new (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) +/* Local functions. */ +static void +ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name) { - ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype, - code->val.str.start); + size_t cnt; + + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], name) == 0) + break; + + if (cnt < ctype->nr_charclass) + { + lr_error (lr, _("character class `%s' already defined"), name); + return; + } + + if (ctype->nr_charclass == MAX_NR_CHARCLASS) + /* Exit code 2 is prescribed in P1003.2b. */ + error (2, 0, _("\ +implementation limit: no more than %d character classes allowed"), + MAX_NR_CHARCLASS); + + ctype->classnames[ctype->nr_charclass++] = name; } -int -ctype_is_charclass (struct linereader *lr, struct localedef_t *locale, - const char *name) +static void +ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name, struct charmap_t *charmap) { + size_t max_chars = 0; size_t cnt; - for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt) - if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt]) - == 0) - return 1; + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + { + if (strcmp (ctype->mapnames[cnt], name) == 0) + break; - return 0; + if (max_chars < ctype->map_collection_max[cnt]) + max_chars = ctype->map_collection_max[cnt]; + } + + if (cnt < ctype->map_collection_nr) + { + lr_error (lr, _("character map `%s' already defined"), name); + return; + } + + if (ctype->map_collection_nr == MAX_NR_CHARMAP) + /* Exit code 2 is prescribed in P1003.2b. */ + error (2, 0, _("\ +implementation limit: no more than %d character maps allowed"), + MAX_NR_CHARMAP); + + ctype->mapnames[cnt] = name; + + if (max_chars == 0) + ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512; + else + ctype->map_collection_max[cnt] = max_chars; + + ctype->map_collection[cnt] = (uint32_t *) + xmalloc (sizeof (uint32_t) * ctype->map_collection_max[cnt]); + memset (ctype->map_collection[cnt], '\0', + sizeof (uint32_t) * ctype->map_collection_max[cnt]); + ctype->map_collection_act[cnt] = 256; + + ++ctype->map_collection_nr; } -void -ctype_class_start (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, const char *str, - struct charset_t *charset) +/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This + is possible if we only want ot extend the name array. */ +static uint32_t * +find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, + size_t *act, uint32_t idx) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; size_t cnt; - switch (tok) - { - case tok_upper: - str = "upper"; - break; - case tok_lower: - str = "lower"; - break; - case tok_alpha: - str = "alpha"; - break; - case tok_digit: - str = "digit"; - break; - case tok_xdigit: - str = "xdigit"; - break; - case tok_space: - str = "space"; - break; - case tok_print: - str = "print"; - break; - case tok_graph: - str = "graph"; - break; - case tok_blank: - str = "blank"; - break; - case tok_cntrl: - str = "cntrl"; - break; - case tok_punct: - str = "punct"; - break; - case tok_alnum: - str = "alnum"; - break; - case tok_ident: + if (idx < 256) + return table == NULL ? NULL : &(*table)[idx]; + + for (cnt = 256; cnt < ctype->charnames_act; ++cnt) + if (ctype->charnames[cnt] == idx) break; - default: - assert (! "illegal token as class name: should not happen"); + + /* We have to distinguish two cases: the name is found or not. */ + if (cnt == ctype->charnames_act) + { + /* Extend the name array. */ + if (ctype->charnames_act == ctype->charnames_max) + { + ctype->charnames_max *= 2; + ctype->charnames = (unsigned int *) + xrealloc (ctype->charnames, + sizeof (unsigned int) * ctype->charnames_max); + } + ctype->charnames[ctype->charnames_act++] = idx; } - for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) - if (strcmp (str, ctype->classnames[cnt]) == 0) - break; + if (table == NULL) + /* We have done everything we are asked to do. */ + return NULL; + + if (cnt >= *act) + { + if (cnt >= *max) + { + size_t old_max = *max; + do + *max *= 2; + while (*max <= cnt); - if (cnt >= ctype->nr_charclass) - assert (! "unknown class in class definition: should not happen"); + *table = + (uint32_t *) xrealloc (*table, *max * sizeof (unsigned long int)); + memset (&(*table)[old_max], '\0', + (*max - old_max) * sizeof (uint32_t)); + } - ctype->class_done |= BIT (tok); + *act = cnt; + } - ctype->current_class_mask = 1 << cnt; - ctype->last_class_char = ILLEGAL_CHAR_VALUE; + return &(*table)[cnt]; } -void -ctype_class_from (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +static int +get_character (struct token *now, struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct charseq **seqp, uint32_t *wchp) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value; + if (now->tok == tok_bsymbol) + { + /* This will hopefully be the normal case. */ + *wchp = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + *seqp = charmap_find_value (charmap, now->val.str.startmb, + now->val.str.lenmb); + } + else if (now->tok == tok_ucs4) + { + *seqp = repertoire_find_seq (repertoire, now->val.ucs4); - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + if (*seqp == NULL) + { + /* Compute the value in the charmap from the UCS value. */ + const char *symbol = repertoire_find_symbol (repertoire, + now->val.ucs4); - ctype->last_class_char = value; + if (symbol == NULL) + *seqp = NULL; + else + *seqp = charmap_find_value (charmap, symbol, strlen (symbol)); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - return; + if (*seqp == NULL) + { + /* Insert a negative entry. */ + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4); + *newp = now->val.ucs4; + + insert_entry (&repertoire->seq_table, newp, 4, + (void *) &negative); + } + else + (*seqp)->ucs4 = now->val.ucs4; + } + else if ((*seqp)->ucs4 != now->val.ucs4) + *seqp = NULL; + + *wchp = now->val.ucs4; + } + else if (now->tok == tok_charcode) + { + /* We must map from the byte code to UCS4. */ + *seqp = charmap_find_symbol (charmap, now->val.str.startmb, + now->val.str.lenmb); - *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max, - &ctype->class_collection_act, value) - |= ctype->current_class_mask; + if (*seqp == NULL) + *wchp = ILLEGAL_CHAR_VALUE; + else + { + if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE) + (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name, + strlen ((*seqp)->name)); + *wchp = (*seqp)->ucs4; + } + } + else + return 1; + + return 0; } -void -ctype_class_to (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'. */ +static void +charclass_symbolic_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, + const char *last_str, + unsigned long int class256_bit, + unsigned long int class_bit, int base, + int ignore_content, int handle_digits) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value, cnt; + const char *nowstr = now->val.str.startmb; + char tmp[now->val.str.lenmb + 1]; + const char *cp; + char *endp; + unsigned long int from; + unsigned long int to; - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + /* We have to compute the ellipsis values using the symbolic names. */ + assert (last_str != NULL); - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE - && (wchar_t) value != ILLEGAL_CHAR_VALUE) - for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt) - *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max, - &ctype->class_collection_act, cnt) - |= ctype->current_class_mask; + if (strlen (last_str) != now->val.str.lenmb) + { + invalid_range: + lr_error (ldfile, + _("`%s' and `%s' are no valid names for symbolic range"), + last_str, nowstr); + return; + } - ctype->last_class_char = ILLEGAL_CHAR_VALUE; -} + if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0) + /* Nothing to do, the names are the same. */ + return; + for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp) + ; -void -ctype_class_end (struct linereader *lr, struct localedef_t *locale) -{ - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + errno = 0; + from = strtoul (cp, &endp, base); + if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0') + goto invalid_range; - /* We have no special actions to perform here. */ - ctype->current_class_mask = 0; - ctype->last_class_char = ILLEGAL_CHAR_VALUE; -} + to = strtoul (nowstr + (cp - last_str), &endp, base); + if ((to == UINT_MAX && errno == ERANGE) || *endp != '\0' || from >= to) + goto invalid_range; + /* OK, we have a range FROM - TO. Now we can create the symbolic names. */ + if (!ignore_content) + { + now->val.str.startmb = tmp; + while (++from <= to) + { + struct charseq *seq; + uint32_t wch; -/* Character map handling. */ -void -ctype_map_new (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) -{ - ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype, - code->val.str.start, charset); -} + sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str, + last_str, now->val.str.lenmb - (cp - last_str), from); + get_character (now, charmap, repertoire, &seq, &wch); -int -ctype_is_charconv (struct linereader *lr, struct localedef_t *locale, - const char *name) -{ - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - size_t cnt; + if (seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; - for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) - if (strcmp (name, ctype->mapnames[cnt]) == 0) - return 1; + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; - return 0; + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + } } -void -ctype_map_start (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, const char *name, struct charset_t *charset) +/* Ellipsis like in `<U1234>..<U2345>'. */ +static void +charclass_ucs4_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, uint32_t last_wch, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - size_t cnt; - - switch (tok) + if (last_wch > now->val.ucs4) { - case tok_toupper: - ctype->toupper_done = 1; - name = "toupper"; - break; - case tok_tolower: - ctype->tolower_done = 1; - name = "tolower"; - break; - case tok_ident: - break; - default: - assert (! "unknown token in category `LC_CTYPE' should not happen"); + lr_error (ldfile, _("\ +to-value <U%0*X> of range is smaller than from-value <U%0*X>"), + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4, + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch); + return; } - for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) - if (strcmp (name, ctype->mapnames[cnt]) == 0) - break; + if (!ignore_content) + while (++last_wch <= now->val.ucs4) + { + /* We have to find out whether there is a byte sequence corresponding + to this UCS4 value. */ + struct charseq *seq = repertoire_find_seq (repertoire, last_wch); - if (cnt == ctype->map_collection_nr) - assert (! "unknown token in category `LC_CTYPE' should not happen"); + /* If this is the first time we look for this sequence create a new + entry. */ + if (seq == NULL) + { + /* Find the symbolic name for this UCS4 value. */ + const char *symbol = repertoire_find_symbol (repertoire, last_wch); + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4); + *newp = last_wch; - ctype->last_map_idx = cnt; - ctype->from_map_char = ILLEGAL_CHAR_VALUE; -} + if (symbol != NULL) + /* We have a name, now search the multibyte value. */ + seq = charmap_find_value (charmap, symbol, strlen (symbol)); + if (seq == NULL) + { + /* We have to create a fake entry. */ + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + seq = (struct charseq *) &negative; + } + else + seq->ucs4 = last_wch; -void -ctype_map_from (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) -{ - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value; + insert_entry (&repertoire->seq_table, newp, 4, seq); + } - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + /* We have a name, now search the multibyte value. */ + if (seq->ucs4 == last_wch && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[(size_t) seq->bytes[0]] + |= class256_bit; - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - return; + /* And of course we have the UCS4 position. */ + if (class_bit != 0 && class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, last_wch) |= class_bit; - assert (ctype->last_map_idx < ctype->map_collection_nr); + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcdigits[ctype->wcdigits_act++] = last_wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } - ctype->from_map_char = value; + ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcoutdigits[ctype->outdigits_act] = last_wch; + ++ctype->outdigits_act; + } + } } -void -ctype_map_to (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +/* Ellipsis as in `/xea/x12.../xea/x34'. */ +static void +charclass_charcode_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, char *last_charcode, + uint32_t last_charcode_len, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value; - - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + /* First check whether the to-value is larger. */ + if (now->val.charcode.nbytes != last_charcode_len) + { + lr_error (ldfile, _("\ +start end end character sequence of range must have the same length")); + return; + } - if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE - || (wchar_t) value == ILLEGAL_CHAR_VALUE) + if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0) { - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - ctype->from_map_char = ILLEGAL_CHAR_VALUE; + lr_error (ldfile, _("\ +to-value character sequence is smaller than from-value sequence")); return; } - *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx], - &ctype->map_collection_max[ctype->last_map_idx], - &ctype->map_collection_act[ctype->last_map_idx], - ctype->from_map_char) = value; + if (!ignore_content) + { + do + { + /* Increment the byte sequence value. */ + struct charseq *seq; + uint32_t wch; + int i; + + for (i = last_charcode_len - 1; i >= 0; --i) + if (++last_charcode[i] != 0) + break; + + if (last_charcode_len == 1) + /* Of course we have the charcode value. */ + ctype->class256_collection[(size_t) last_charcode[0]] + |= class256_bit; + + /* Find the symbolic name. */ + seq = charmap_find_symbol (charmap, last_charcode, + last_charcode_len); + if (seq != NULL) + { + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + wch = seq->ucs4; + + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + } + else + wch = ILLEGAL_CHAR_VALUE; - ctype->from_map_char = ILLEGAL_CHAR_VALUE; + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + struct charseq *seq; + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + while (memcmp (last_charcode, now->val.charcode.bytes, + last_charcode_len) != 0); + } } -void -ctype_map_end (struct linereader *lr, struct localedef_t *locale) +/* Read one transliteration entry. */ +static uint32_t * +read_widestring (struct linereader *ldfile, struct token *now, + struct charmap_t *charmap, struct repertoire_t *repertoire) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + uint32_t *wstr; + + if (now->tok == tok_default_missing) + /* The special name "" will denote this case. */ + wstr = (uint32_t *) L""; + else if (now->tok == tok_bsymbol) + { + /* Get the value from the repertoire. */ + wstr = xmalloc (2 * sizeof (uint32_t)); + wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + if (wstr[0] == ILLEGAL_CHAR_VALUE) + /* We cannot proceed, we don't know the UCS4 value. */ + return NULL; + + wstr[1] = 0; + } + else if (now->tok == tok_ucs4) + { + wstr = xmalloc (2 * sizeof (uint32_t)); + wstr[0] = now->val.ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_charcode) + { + /* Argh, we have to convert to the symbol name first and then to the + UCS4 value. */ + struct charseq *seq = charmap_find_symbol (charmap, + now->val.str.startmb, + now->val.str.lenmb); + if (seq == NULL) + /* Cannot find the UCS4 value. */ + return NULL; + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + if (seq->ucs4 == ILLEGAL_CHAR_VALUE) + /* We cannot proceed, we don't know the UCS4 value. */ + return NULL; + + wstr = xmalloc (2 * sizeof (uint32_t)); + wstr[0] = seq->ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_string) + { + wstr = now->val.str.startwc; + if (wstr[0] == 0) + return NULL; + } + else + { + if (now->tok != tok_eol && now->tok != tok_eof) + lr_ignore_rest (ldfile, 0); + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + return (uint32_t *) -1l; + } - ctype->last_map_idx = MAX_NR_CHARMAP; - ctype->from_map_char = ILLEGAL_CHAR_VALUE; + return wstr; } -/* Local functions. */ static void -ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype, - const char *name) +read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype, + struct token *now, struct charmap_t *charmap, + struct repertoire_t *repertoire) { - size_t cnt; + uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire); + struct translit_t *result; + struct translit_to_t **top; + struct obstack *ob = &ctype->mem_pool; + int first; + int ignore; + + if (from_wstr == NULL) + /* There is no valid from string. */ + return; - for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) - if (strcmp (ctype->classnames[cnt], name) == 0) - break; + result = (struct translit_t *) obstack_alloc (ob, + sizeof (struct translit_t)); + result->from = from_wstr; + result->next = NULL; + result->to = NULL; + top = &result->to; + first = 1; + ignore = 0; - if (cnt < ctype->nr_charclass) + while (1) { - lr_error (lr, _("character class `%s' already defined"), name); - return; - } + uint32_t *to_wstr; - if (ctype->nr_charclass == MAX_NR_CHARCLASS) - /* Exit code 2 is prescribed in P1003.2b. */ - error (2, 0, _("\ -implementation limit: no more than %d character classes allowed"), - MAX_NR_CHARCLASS); + /* Next we have one or more transliterations. They are + separated by semicolons. */ + now = lr_token (ldfile, charmap, repertoire); - ctype->classnames[ctype->nr_charclass++] = name; + if (!first && (now->tok == tok_semicolon || now->tok == tok_eol)) + { + /* One string read. */ + const uint32_t zero = 0; + + if (!ignore) + { + obstack_grow (ob, &zero, 4); + to_wstr = obstack_finish (ob); + + *top = obstack_alloc (ob, sizeof (struct translit_to_t)); + (*top)->str = to_wstr; + (*top)->next = NULL; + } + + if (now->tok == tok_eol) + { + result->next = ctype->translit; + ctype->translit = result; + return; + } + + if (!ignore) + top = &(*top)->next; + ignore = 0; + } + else + { + to_wstr = read_widestring (ldfile, now, charmap, repertoire); + if (to_wstr == (uint32_t *) -1l) + { + /* An error occurred. */ + obstack_free (ob, result); + return; + } + + if (to_wstr == NULL) + ignore = 1; + else + /* This value is usable. */ + obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4); + + first = 0; + } + } } -static void -ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype, - const char *name, struct charset_t *charset) +/* The parser for the LC_CTYPE section of the locale definition. */ +void +ctype_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) { - size_t max_chars = 0; + struct repertoire_t *repertoire = NULL; + struct locale_ctype_t *ctype; + struct token *now; + enum token_t nowtok; size_t cnt; + struct charseq *last_seq; + uint32_t last_wch = 0; + enum token_t last_token; + enum token_t ellipsis_token; + char last_charcode[16]; + size_t last_charcode_len = 0; + const char *last_str = NULL; + int mapidx; - for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) - { - if (strcmp (ctype->mapnames[cnt], name) == 0) - break; + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); - if (max_chars < ctype->map_collection_max[cnt]) - max_chars = ctype->map_collection_max[cnt]; + /* The rest of the line containing `LC_CTYPE' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } + while (nowtok == tok_eol); - if (cnt < ctype->map_collection_nr) + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) { - lr_error (lr, _("character map `%s' already defined"), name); + handle_copy (ldfile, charmap, repertoire, tok_lc_ctype, LC_CTYPE, + "LC_CTYPE", ignore_content); return; } - if (ctype->map_collection_nr == MAX_NR_CHARMAP) - /* Exit code 2 is prescribed in P1003.2b. */ - error (2, 0, _("\ -implementation limit: no more than %d character maps allowed"), - MAX_NR_CHARMAP); + /* Prepare the data structures. */ + ctype_startup (ldfile, result, charmap, ignore_content); + ctype = result->categories[LC_CTYPE].ctype; - ctype->mapnames[cnt] = name; + /* Remember the repertoire we use. */ + if (!ignore_content) + ctype->repertoire = repertoire; - if (max_chars == 0) - ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512; - else - ctype->map_collection_max[cnt] = max_chars; + while (1) + { + unsigned long int class_bit = 0; + unsigned long int class256_bit = 0; + int handle_digits = 0; - ctype->map_collection[cnt] = (u_int32_t *) - xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]); - memset (ctype->map_collection[cnt], '\0', - sizeof (u_int32_t) * ctype->map_collection_max[cnt]); - ctype->map_collection_act[cnt] = 256; + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; - ++ctype->map_collection_nr; -} + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + switch (nowtok) + { + case tok_class: + /* We simply forget the `class' keyword and use the following + operand to determine the bit. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_ident || now->tok == tok_string) + { + /* Must be one of the predefined class names. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0) + break; + if (cnt >= ctype->nr_charclass) + { + if (now->val.str.lenmb == 8 + && memcmp ("special1", now->val.str.startmb, 8) == 0) + class_bit = _ISwspecial1; + else if (now->val.str.lenmb == 8 + && memcmp ("special2", now->val.str.startmb, 8) == 0) + class_bit = _ISwspecial2; + else if (now->val.str.lenmb == 8 + && memcmp ("special3", now->val.str.startmb, 8) == 0) + class_bit = _ISwspecial3; + else + { + lr_error (ldfile, _("\ +unknown character class `%s' in category `LC_CTYPE'"), + now->val.str.startmb); + free (now->val.str.startmb); + + lr_ignore_rest (ldfile, 0); + continue; + } + } + else + class_bit = _ISwbit (cnt); + + free (now->val.str.startmb); + } + else if (now->tok == tok_digit) + goto handle_tok_digit; + else if (now->tok < tok_upper || now->tok > tok_blank) + goto err_label; + else + { + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + } -/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This - is possible if we only want to extend the name array. */ -static u_int32_t * -find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max, - size_t *act, unsigned int idx) -{ - size_t cnt; + /* The next character must be a semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + goto err_label; + goto read_charclass; + + case tok_upper: + case tok_lower: + case tok_alpha: + case tok_alnum: + case tok_space: + case tok_cntrl: + case tok_punct: + case tok_graph: + case tok_print: + case tok_xdigit: + case tok_blank: + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + handle_digits = 0; + read_charclass: + ctype->class_done |= class_bit; + last_token = tok_none; + ellipsis_token = tok_none; + now = lr_token (ldfile, charmap, NULL); + while (now->tok != tok_eol && now->tok != tok_eof) + { + uint32_t wch; + struct charseq *seq; + + if (ellipsis_token == tok_none) + { + if (get_character (now, charmap, repertoire, &seq, &wch)) + goto err_label; + + if (!ignore_content && seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte + sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; + + if (!ignore_content && wch != ILLEGAL_CHAR_VALUE + && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + + last_token = now->tok; + last_str = now->val.str.startmb; + last_seq = seq; + last_wch = wch; + memcpy (last_charcode, now->val.charcode.bytes, 16); + last_charcode_len = now->val.charcode.nbytes; + + if (!ignore_content && handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (!ignore_content && handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + goto err_label; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + else + { + /* Now it gets complicated. We have to resolve the + ellipsis problem. First we must distinguish between + the different kind of ellipsis and this must match the + tokens we have seen. */ + assert (last_token != tok_none); + + if (last_token != now->tok) + { + lr_error (ldfile, _("\ +ellipsis range must be marked by two operands of same type")); + lr_ignore_rest (ldfile, 0); + break; + } + + if (last_token == tok_bsymbol) + { + if (ellipsis_token == tok_ellipsis3) + lr_error (ldfile, _("with symbolic name range values \ +the absolute ellipsis `...' must not be used")); + + charclass_symbolic_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_str, + class256_bit, class_bit, + (ellipsis_token + == tok_ellipsis4 + ? 10 : 16), + ignore_content, + handle_digits); + } + else if (last_token == tok_ucs4) + { + if (ellipsis_token != tok_ellipsis2) + lr_error (ldfile, _("\ +with UCS range values one must use the hexadecimal symbolic ellipsis `..'")); + + charclass_ucs4_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_wch, + class256_bit, class_bit, + ignore_content, handle_digits); + } + else + { + assert (last_token == tok_charcode); + + if (ellipsis_token != tok_ellipsis3) + lr_error (ldfile, _("\ +with character code range values one must use the absolute ellipsis `...'")); + + charclass_charcode_ellipsis (ldfile, ctype, charmap, + repertoire, now, + last_charcode, + last_charcode_len, + class256_bit, class_bit, + ignore_content, + handle_digits); + } + + /* Now we have used the last value. */ + last_token = tok_none; + } + + /* Next we expect a semicolon or the end of the line. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eol || now->tok == tok_eof) + break; + + if (last_token != tok_none + && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4) + { + ellipsis_token = now->tok; + now = lr_token (ldfile, charmap, NULL); + continue; + } + + if (now->tok != tok_semicolon) + goto err_label; + + /* And get the next character. */ + now = lr_token (ldfile, charmap, NULL); + + ellipsis_token = tok_none; + } + break; + + case tok_digit: + handle_tok_digit: + class_bit = _ISwdigit; + class256_bit = _ISdigit; + handle_digits = 1; + goto read_charclass; + + case tok_outdigit: + if (ctype->outdigits_act != 0) + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), + "LC_CTYPE", "outdigit"); + class_bit = 0; + class256_bit = 0; + handle_digits = 2; + goto read_charclass; + + case tok_toupper: + mapidx = 0; + goto read_mapping; + + case tok_tolower: + mapidx = 1; + goto read_mapping; + + case tok_map: + /* We simply forget the `map' keyword and use the following + operand to determine the mapping. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_ident || now->tok == tok_string) + { + size_t cnt; - if (idx < 256) - return table == NULL ? NULL : &(*table)[idx]; + for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) + break; - for (cnt = 256; cnt < ctype->charnames_act; ++cnt) - if (ctype->charnames[cnt] == idx) - break; + if (cnt < ctype->map_collection_nr) + mapidx = cnt; + else + { + lr_error (ldfile, _("unknown map `%s'"), + now->val.str.startmb); + lr_ignore_rest (ldfile, 0); + break; + } + } + else if (now->tok < tok_toupper || now->tok > tok_tolower) + goto err_label; + else + mapidx = now->tok - tok_toupper; - /* We have to distinguish two cases: the name is found or not. */ - if (cnt == ctype->charnames_act) - { - /* Extend the name array. */ - if (ctype->charnames_act == ctype->charnames_max) - { - ctype->charnames_max *= 2; - ctype->charnames = (unsigned int *) - xrealloc (ctype->charnames, - sizeof (unsigned int) * ctype->charnames_max); - } - ctype->charnames[ctype->charnames_act++] = idx; - } + now = lr_token (ldfile, charmap, NULL); + /* This better should be a semicolon. */ + if (now->tok != tok_semicolon) + goto err_label; - if (table == NULL) - /* We have done everything we are asked to do. */ - return NULL; + read_mapping: + /* Test whether this mapping was already defined. */ + if (ctype->tomap_done[mapidx]) + { + lr_error (ldfile, _("duplicated definition for mapping `%s'"), + ctype->mapnames[mapidx]); + lr_ignore_rest (ldfile, 0); + break; + } + ctype->tomap_done[mapidx] = 1; - if (cnt >= *act) - { - if (cnt >= *max) - { - size_t old_max = *max; - do - *max *= 2; - while (*max <= cnt); + now = lr_token (ldfile, charmap, NULL); + while (now->tok != tok_eol && now->tok != tok_eof) + { + struct charseq *from_seq; + uint32_t from_wch; + struct charseq *to_seq; + uint32_t to_wch; + + /* Every pair starts with an opening brace. */ + if (now->tok != tok_open_brace) + goto err_label; + + /* Next comes the from-value. */ + now = lr_token (ldfile, charmap, NULL); + if (get_character (now, charmap, repertoire, &from_seq, + &from_wch) != 0) + goto err_label; + + /* The next is a comma. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_comma) + goto err_label; + + /* And the other value. */ + now = lr_token (ldfile, charmap, NULL); + if (get_character (now, charmap, repertoire, &to_seq, + &to_wch) != 0) + goto err_label; + + /* And the last thing is the closing brace. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_close_brace) + goto err_label; + + if (!ignore_content) + { + if (mapidx < 2 && from_seq != NULL && to_seq != NULL + && from_seq->nbytes == 1 && to_seq->nbytes == 1) + /* We can use this value. */ + ctype->map256_collection[mapidx][from_seq->bytes[0]] + = to_seq->bytes[0]; + + if (from_wch != ILLEGAL_CHAR_VALUE + && to_wch != ILLEGAL_CHAR_VALUE) + /* Both correct values. */ + *find_idx (ctype, &ctype->map_collection[mapidx], + &ctype->map_collection_max[mapidx], + &ctype->map_collection_act[mapidx], + from_wch) = to_wch; + } + + /* Now comes a semicolon or the end of the line/file. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_semicolon) + now = lr_token (ldfile, charmap, NULL); + } + break; - *table = - (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int)); - memset (&(*table)[old_max], '\0', - (*max - old_max) * sizeof (u_int32_t)); + case tok_translit_start: + /* The rest of the line better should be empty. */ + lr_ignore_rest (ldfile, 1); + + /* We count here the number of allocated entries in the `translit' + array. */ + cnt = 0; + + /* We proceed until we see the `translit_end' token. */ + while (now = lr_token (ldfile, charmap, repertoire), + now->tok != tok_translit_end && now->tok != tok_eof) + { + if (now->tok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (now->tok == tok_translit_end) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_include) + { + /* We have to include locale. */ + const char *locale_name; + const char *repertoire_name; + + now = lr_token (ldfile, charmap, NULL); + /* This should be a string or an identifier. In any + case something to name a locale. */ + if (now->tok != tok_string && now->tok != tok_ident) + { + translit_syntax: + lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + continue; + } + locale_name = now->val.str.startmb; + + /* Next should be a semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + goto translit_syntax; + + /* Now the repertoire name. */ + now = lr_token (ldfile, charmap, NULL); + if ((now->tok != tok_string && now->tok != tok_ident) + || now->val.str.startmb == NULL) + goto translit_syntax; + repertoire_name = now->val.str.startmb; + + /* We must not have more than one `include'. */ + if (ctype->translit_copy_locale != NULL) + { + lr_error (ldfile, _("\ +%s: only one `include' instruction allowed"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + continue; + } + + ctype->translit_copy_locale = locale_name; + ctype->translit_copy_repertoire = repertoire_name; + + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + continue; + } + + read_translit_entry (ldfile, ctype, now, charmap, repertoire); + } + break; + + case tok_ident: + /* This could mean one of several things. First test whether + it's a character class name. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0) + break; + if (cnt < ctype->nr_charclass) + { + class_bit = _ISwbit (cnt); + class256_bit = cnt <= 11 ? _ISbit (cnt) : 0; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "special1") == 0) + { + class_bit = _ISwspecial1; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "special2") == 0) + { + class_bit = _ISwspecial2; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "special3") == 0) + { + class_bit = _ISwspecial3; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "tosymmetric") == 0) + { + mapidx = 2; + goto read_mapping; + } + break; + + case tok_end: + /* Next we assume `LC_CTYPE'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_CTYPE"); + else if (now->tok != tok_lc_ctype) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); + lr_ignore_rest (ldfile, now->tok == tok_lc_ctype); + return; + + default: + err_label: + if (now->tok != tok_eof) + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); } - (*table)[cnt] = 0; - *act = cnt; + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } - return &(*table)[cnt]; + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); } static void -set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset) +set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap, + struct repertoire_t *repertoire) { + size_t cnt; + /* These function defines the default values for the classes and conversions according to POSIX.2 2.5.2.1. It may seem that the order of these if-blocks is arbitrary but it is NOT. Don't move them unless you know what you do! */ - void set_default (int bit, int from, int to) + void set_default (int bitpos, int from, int to) { char tmp[2]; int ch; + int bit = _ISbit (bitpos); + int bitw = _ISwbit (bitpos); /* Define string. */ strcpy (tmp, "?"); for (ch = from; ch <= to; ++ch) { - unsigned int value; + uint32_t value; + struct charseq *seq; tmp[0] = ch; - value = charset_find_value (&charset->char_table, tmp, 1); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, tmp, 1); + if (value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - tmp); - continue; +%s: character `%s' not defined in repertoire while needed as default value"), + "LC_CTYPE", tmp); } else - ELEM (ctype, class_collection, , value) |= bit; + ELEM (ctype, class_collection, , value) |= bitw; + + seq = charmap_find_value (charmap, tmp, 1); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined in charmap while needed as default value"), + "LC_CTYPE", tmp); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", tmp); + else + ctype->class256_collection[seq->bytes[0]] |= bit; } } /* Set default values if keyword was not present. */ - if ((ctype->class_done & BIT (tok_upper)) == 0) + if ((ctype->class_done & BITw (tok_upper)) == 0) /* "If this keyword [lower] is not specified, the lowercase letters `A' through `Z', ..., shall automatically belong to this class, with implementation defined character values." [P1003.2, 2.5.2.1] */ - set_default (BIT (tok_upper), 'A', 'Z'); + set_default (BITPOS (tok_upper), 'A', 'Z'); - if ((ctype->class_done & BIT (tok_lower)) == 0) + if ((ctype->class_done & BITw (tok_lower)) == 0) /* "If this keyword [lower] is not specified, the lowercase letters `a' through `z', ..., shall automatically belong to this class, with implementation defined character values." [P1003.2, 2.5.2.1] */ - set_default (BIT (tok_lower), 'a', 'z'); + set_default (BITPOS (tok_lower), 'a', 'z'); - if ((ctype->class_done & BIT (tok_alpha)) == 0) + if ((ctype->class_done & BITw (tok_alpha)) == 0) { /* Table 2-6 in P1003.2 says that characters in class `upper' or class `lower' *must* be in class `alpha'. */ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower); - size_t cnt; for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_alpha); } - if ((ctype->class_done & BIT (tok_digit)) == 0) + if ((ctype->class_done & BITw (tok_digit)) == 0) /* "If this keyword [digit] is not specified, the digits `0' through `9', ..., shall automatically belong to this class, with implementation-defined character values." [P1003.2, 2.5.2.1] */ - set_default (BIT (tok_digit), '0', '9'); + set_default (BITPOS (tok_digit), '0', '9'); /* "Only characters specified for the `alpha' and `digit' keyword shall be specified. Characters specified for the keyword `alpha' and `digit' are automatically included in this class. */ { unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit); - size_t cnt; for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_alnum); } - if ((ctype->class_done & BIT (tok_space)) == 0) + if ((ctype->class_done & BITw (tok_space)) == 0) /* "If this keyword [space] is not specified, the characters <space>, <form-feed>, <newline>, <carriage-return>, <tab>, and <vertical-tab>, ..., shall automatically belong to this class, with implementation-defined character values." [P1003.2, 2.5.2.1] */ { - unsigned int value; + uint32_t value; + struct charseq *seq; - value = charset_find_value (&charset->char_table, "space", 5); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, "space", 5); + if (value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<space>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "form-feed", 9); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<form-feed>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "form-feed", 9); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<form-feed>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "newline", 7); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "form-feed", 9); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<newline>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<form-feed>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<form-feed>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "newline", 7); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<newline>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "carriage-return", 15); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "newline", 7); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ character `%s' not defined while needed as default value"), - "<carriage-return>"); + "<newline>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<newline>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "carriage-return", 15); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<carriage-return>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "tab", 3); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "carriage-return", 15); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<tab>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<carriage-return>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<carriage-return>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "tab", 3); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "vertical-tab", 12); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<vertical-tab>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<tab>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "vertical-tab", 12); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<vertical-tab>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); + + seq = charmap_find_value (charmap, "vertical-tab", 12); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<vertical-tab>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<vertical-tab>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); } - if ((ctype->class_done & BIT (tok_xdigit)) == 0) + if ((ctype->class_done & BITw (tok_xdigit)) == 0) /* "If this keyword is not specified, the digits `0' to `9', the uppercase letters `A' through `F', and the lowercase letters `a' through `f', ..., shell automatically belong to this class, with implementation defined character values." [P1003.2, 2.5.2.1] */ { - set_default (BIT (tok_xdigit), '0', '9'); - set_default (BIT (tok_xdigit), 'A', 'F'); - set_default (BIT (tok_xdigit), 'a', 'f'); + set_default (BITPOS (tok_xdigit), '0', '9'); + set_default (BITPOS (tok_xdigit), 'A', 'F'); + set_default (BITPOS (tok_xdigit), 'a', 'f'); } - if ((ctype->class_done & BIT (tok_blank)) == 0) + if ((ctype->class_done & BITw (tok_blank)) == 0) /* "If this keyword [blank] is unspecified, the characters <space> and <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */ { - unsigned int value; + uint32_t value; + struct charseq *seq; - value = charset_find_value (&charset->char_table, "space", 5); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, "space", 5); + if (value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<space>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_blank); - value = charset_find_value (&charset->char_table, "tab", 3); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<tab>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); + + + value = repertoire_find_value (repertoire, "tab", 3); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>"); } else ELEM (ctype, class_collection, , value) |= BIT (tok_blank); + + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<tab>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); } - if ((ctype->class_done & BIT (tok_graph)) == 0) + if ((ctype->class_done & BITw (tok_graph)) == 0) /* "If this keyword [graph] is not specified, characters specified for the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct', shall belong to this character class." [P1003.2, 2.5.2.1] */ @@ -1142,9 +2603,13 @@ character `%s' not defined while needed as default value"), for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_graph); + + for (cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_graph); } - if ((ctype->class_done & BIT (tok_print)) == 0) + if ((ctype->class_done & BITw (tok_print)) == 0) /* "If this keyword [print] is not provided, characters specified for the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct', and the <space> character shall belong to this character class." @@ -1153,25 +2618,46 @@ character `%s' not defined while needed as default value"), unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); size_t cnt; - wchar_t space; + uint32_t space; + struct charseq *seq; for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_print); - space = charset_find_value (&charset->char_table, "space", 5); + for (cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_print); + + + space = repertoire_find_value (repertoire, "space", 5); if (space == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - "<space>"); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>"); } else ELEM (ctype, class_collection, , space) |= BIT (tok_print); + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>"); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>"); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print); } - if (ctype->toupper_done == 0) + if (ctype->tomap_done[0] == 0) /* "If this keyword [toupper] is not specified, the lowercase letters `a' through `z', and their corresponding uppercase letters `A' to `Z', ..., shall automatically be included, with implementation- @@ -1184,55 +2670,133 @@ character `%s' not defined while needed as default value"), for (ch = 'a'; ch <= 'z'; ++ch) { - unsigned int value_from, value_to; + uint32_t value_from, value_to; + struct charseq *seq_from, *seq_to; tmp[1] = (char) ch; - value_from = charset_find_value (&charset->char_table, &tmp[1], 1); - if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE) + value_from = repertoire_find_value (repertoire, &tmp[1], 1); + if (value_from == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - tmp); - continue; +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else + { + /* This conversion is implementation defined. */ + tmp[1] = (char) (ch + ('A' - 'a')); + value_to = repertoire_find_value (repertoire, &tmp[1], 1); + if (value_to == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else + /* The index [0] is determined by the order of the + `ctype_map_newP' calls in `ctype_startup'. */ + ELEM (ctype, map_collection, [0], value_from) = value_to; } - /* This conversion is implementation defined. */ - tmp[1] = (char) (ch + ('A' - 'a')); - value_to = charset_find_value (&charset->char_table, &tmp[1], 1); - if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE) + seq_from = charmap_find_value (charmap, &tmp[1], 1); + if (seq_from == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - tmp); - continue; +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else if (seq_from->nbytes != 1) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp); + } + else + { + /* This conversion is implementation defined. */ + tmp[1] = (char) (ch + ('A' - 'a')); + seq_to = charmap_find_value (charmap, &tmp[1], 1); + if (seq_to == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else if (seq_to->nbytes != 1) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp); + } + else + /* The index [0] is determined by the order of the + `ctype_map_newP' calls in `ctype_startup'. */ + ctype->map256_collection[0][seq_from->bytes[0]] + = seq_to->bytes[0]; } - - /* The index [0] is determined by the order of the - `ctype_map_newP' calls in `ctype_startup'. */ - ELEM (ctype, map_collection, [0], value_from) = value_to; } } - if (ctype->tolower_done == 0) + if (ctype->tomap_done[1] == 0) /* "If this keyword [tolower] is not specified, the mapping shall be the reverse mapping of the one specified to `toupper'." [P1003.2] */ { - size_t cnt; - for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt) if (ctype->map_collection[0][cnt] != 0) ELEM (ctype, map_collection, [1], ctype->map_collection[0][cnt]) = ctype->charnames[cnt]; + + for (cnt = 0; cnt < 256; ++cnt) + if (ctype->map256_collection[0][cnt] != 0) + ctype->map_collection[1][ctype->map_collection[0][cnt]] + = ctype->charnames[cnt]; + } + + if (ctype->outdigits_act == 0) + { + for (cnt = 0; cnt < 10; ++cnt) + { + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + digits + cnt, 1); + + if (ctype->mboutdigits[cnt] == NULL) + { + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + + if (ctype->mboutdigits[cnt] == NULL) + { + /* Provide a replacement. */ + error (0, 0, _("\ +no output digits defined and none of the standard names in the charmap")); + + ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool, + sizeof (struct charseq) + 1); + + /* This is better than nothing. */ + ctype->mboutdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mboutdigits[cnt]->nbytes = 1; + } + } + } + + ctype->outdigits_act = 10; } } static void -allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset) +allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap, + struct repertoire_t *repertoire) { size_t idx; @@ -1300,12 +2864,12 @@ Computing table size for character classes might take a while..."), # define NAMES_B2 ctype->names_el #endif - ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size - * ctype->plane_cnt, - sizeof (u_int32_t)); - ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size - * ctype->plane_cnt, - sizeof (u_int32_t)); + ctype->names_eb = (uint32_t *) xcalloc (ctype->plane_size + * ctype->plane_cnt, + sizeof (uint32_t)); + ctype->names_el = (uint32_t *) xcalloc (ctype->plane_size + * ctype->plane_cnt, + sizeof (uint32_t)); for (idx = 1; idx < 256; ++idx) NAMES_B1[idx] = idx; @@ -1330,7 +2894,7 @@ Computing table size for character classes might take a while..."), NAMES_B1[0] = 0; for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx) - NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]); + NAMES_B2[idx] = bswap_32 (NAMES_B1[idx]); /* You wonder about this amount of memory? This is only because some @@ -1353,10 +2917,9 @@ Computing table size for character classes might take a while..."), # define TRANS32(w) (w) #endif - for (idx = 0; idx < ctype->class_collection_act; ++idx) - if (ctype->charnames[idx] < 256) - ctype->ctype_b[128 + ctype->charnames[idx]] - = TRANS (ctype->class_collection[idx]); + /* This is the array accessed usig the multibyte string elements. */ + for (idx = 0; idx < 256; ++idx) + ctype->ctype_b[128 + idx] = TRANS (ctype->class256_collection[idx]); /* Mirror first 127 entries. We must take care that entry -1 is not mirrored because EOF == -1. */ @@ -1369,10 +2932,10 @@ Computing table size for character classes might take a while..."), = TRANS32 (ctype->class_collection[idx]); /* Room for table of mappings. */ - ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr - * sizeof (u_int32_t *)); - ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr - * sizeof (u_int32_t *)); + ctype->map_eb = (uint32_t **) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t *)); + ctype->map_el = (uint32_t **) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t *)); /* Fill in all mappings. */ for (idx = 0; idx < ctype->map_collection_nr; ++idx) @@ -1380,12 +2943,12 @@ Computing table size for character classes might take a while..."), unsigned int idx2; /* Allocate table. */ - ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size - * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); - ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size - * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + ctype->map_eb[idx] = (uint32_t *) xmalloc ((ctype->plane_size + * ctype->plane_cnt + 128) + * sizeof (uint32_t)); + ctype->map_el[idx] = (uint32_t *) xmalloc ((ctype->plane_size + * ctype->plane_cnt + 128) + * sizeof (uint32_t)); #if __BYTE_ORDER == __LITTLE_ENDIAN # define MAP_B1 ctype->map_el @@ -1397,13 +2960,11 @@ Computing table size for character classes might take a while..."), /* Copy default value (identity mapping). */ memcpy (&MAP_B1[idx][128], NAMES_B1, - ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t)); + ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t)); /* Copy values from collection. */ - for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2) - if (ctype->map_collection[idx][idx2] != 0) - MAP_B1[idx][128 + ctype->charnames[idx2]] = - ctype->map_collection[idx][idx2]; + for (idx2 = 0; idx2 < 256; ++idx2) + MAP_B1[idx][128 + idx2] = ctype->map256_collection[idx][idx2]; /* Mirror first 127 entries. We must take care not to map entry -1 because EOF == -1. */ @@ -1415,14 +2976,14 @@ Computing table size for character classes might take a while..."), /* And now the other byte order. */ for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2) - MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]); + MAP_B2[idx][idx2] = bswap_32 (MAP_B1[idx][idx2]); } /* Extra array for class and map names. */ - ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass - * sizeof (u_int32_t)); - ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr - * sizeof (u_int32_t)); + ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass + * sizeof (uint32_t)); + ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t)); /* Array for width information. Because the expected width are very small we use only one single byte. This save space and we need @@ -1430,16 +2991,17 @@ Computing table size for character classes might take a while..."), ctype->width = (unsigned char *) xmalloc (ctype->plane_size * ctype->plane_cnt); /* Initialize with default width value. */ - memset (ctype->width, charset->width_default, + memset (ctype->width, charmap->width_default, ctype->plane_size * ctype->plane_cnt); - if (charset->width_rules != NULL) + if (charmap->width_rules != NULL) { +#if 0 size_t cnt; - for (cnt = 0; cnt < charset->nwidth_rules; ++cnt) - if (charset->width_rules[cnt].width != charset->width_default) - for (idx = charset->width_rules[cnt].from; - idx <= charset->width_rules[cnt].to; ++idx) + for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) + if (charmap->width_rules[cnt].width != charmap->width_default) + for (idx = charmap->width_rules[cnt].from; + idx <= charmap->width_rules[cnt].to; ++idx) { size_t nr = idx % ctype->plane_size; size_t depth = 0; @@ -1449,15 +3011,229 @@ Computing table size for character classes might take a while..."), assert (depth < ctype->plane_cnt); ctype->width[nr + depth * ctype->plane_size] - = charset->width_rules[cnt].width; + = charmap->width_rules[cnt].width; } +#else + abort (); +#endif } - /* Compute MB_CUR_MAX. */ - ctype->mb_cur_max = charset->mb_cur_max; + /* Set MB_CUR_MAX. */ + ctype->mb_cur_max = charmap->mb_cur_max; /* We need the name of the currently used 8-bit character set to make correct conversion between this 8-bit representation and the ISO 10646 character set used internally for wide characters. */ - ctype->codeset_name = charset->code_set_name ? : ""; + ctype->codeset_name = charmap->code_set_name; + + /* Now determine the table for the transliteration information. + + XXX It is not yet clear to me whether it is worth implementing a + complicated algorithm which uses a hash table to locate the entries. + For now I'll use a simple array which can be searching using binary + search. */ + if (ctype->translit_copy_locale != NULL) + { + /* Fold in the transliteration information from the locale mentioned + in the `include' statement. */ + struct locale_ctype_t *here = ctype; + + do + { + struct localedef_t *other = find_locale (LC_CTYPE, + here->translit_copy_locale, + repertoire->name, charmap); + + if (other == NULL) + { + error (0, 0, _("\ +%s: transliteration data from locale `%s' not available"), + "LC_CTYPE", here->translit_copy_locale); + break; + } + + here = other->categories[LC_CTYPE].ctype; + + /* Enqueue the information if necessary. */ + if (here->translit != NULL) + { + struct translit_t *endp = here->translit; + while (endp->next != NULL) + endp = endp->next; + + endp->next = ctype->translit; + ctype->translit = here->translit; + } + } + while (here->translit_copy_locale != NULL); + } + + if (ctype->translit != NULL) + { + /* First count how many entries we have. This is the upper limit + since some entries from the included files might be overwritten. */ + size_t number = 0; + size_t cnt; + struct translit_t *runp = ctype->translit; + struct translit_t **sorted; + size_t from_len, to_len; + + while (runp != NULL) + { + ++number; + runp = runp->next; + } + + /* Next we allocate an array large enough and fill in the values. */ + sorted = alloca (number * sizeof (struct translit_t **)); + runp = ctype->translit; + number = 0; + do + { + /* Search for the place where to insert this string. + XXX Better use a real sorting algorithm later. */ + size_t idx = 0; + int replace = 0; + + while (idx < number) + { + int res = wcscmp ((const wchar_t *) sorted[idx]->from, + (const wchar_t *) runp->from); + if (res == 0) + { + replace = 1; + break; + } + if (res > 0) + break; + ++idx; + } + + if (replace) + sorted[idx] = runp; + else + { + memmove (&sorted[idx + 1], &sorted[idx], + (number - idx) * sizeof (struct translit_t *)); + sorted[idx] = runp; + ++number; + } + + runp = runp->next; + } + while (runp != NULL); + + /* The next step is putting all the possible transliteration + strings in one memory block so that we can write it out. + We need several different blocks: + - index to the tfromstring array + - from-string array + - index to the to-string array + - to-string array. + And this all must be available for both endianes variants. + */ + from_len = to_len = 0; + for (cnt = 0; cnt < number; ++cnt) + { + struct translit_to_t *srunp; + from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + to_len += wcslen ((const wchar_t *) srunp->str) + 1; + srunp = srunp->next; + } + /* Plus one for the extra NUL character marking the end of + the list for the current entry. */ + ++to_len; + } + + /* We can allocate the arrays for the results. */ +#if BYTE_ORDER == LITTLE_ENDIAN +# define from_idx translit_from_idx_el +# define from_tbl translit_from_tbl_el +# define to_idx translit_to_idx_el +# define to_tbl translit_to_tbl_el +# define from_idx_ob translit_from_idx_eb +# define from_tbl_ob translit_from_tbl_eb +# define to_idx_ob translit_to_idx_eb +# define to_tbl_ob translit_to_tbl_eb +#else +# define from_idx translit_from_idx_eb +# define from_tbl translit_from_tbl_eb +# define to_idx translit_to_idx_eb +# define to_tbl translit_to_tbl_eb +# define from_idx_ob translit_from_idx_el +# define from_tbl_ob translit_from_tbl_el +# define to_idx_ob translit_to_idx_el +# define to_tbl_ob translit_to_tbl_el +#endif + ctype->from_idx = xmalloc (number * sizeof (uint32_t)); + ctype->from_idx_ob = xmalloc (number * sizeof (uint32_t)); + ctype->from_tbl = xmalloc (from_len * sizeof (uint32_t)); + ctype->from_tbl_ob = xmalloc (from_len * sizeof (uint32_t)); + ctype->to_idx = xmalloc (number * sizeof (uint32_t)); + ctype->to_idx_ob = xmalloc (number * sizeof (uint32_t)); + ctype->to_tbl = xmalloc (to_len * sizeof (uint32_t)); + ctype->to_tbl_ob = xmalloc (to_len * sizeof (uint32_t)); + + from_len = 0; + to_len = 0; + for (cnt = 0; cnt < number; ++cnt) + { + size_t len; + struct translit_to_t *srunp; + + ctype->from_idx[cnt] = from_len; + ctype->to_idx[cnt] = to_len; + + len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + wmemcpy ((wchar_t *) &ctype->from_tbl[from_len], + (const wchar_t *) sorted[cnt]->from, len); + from_len += len; + + ctype->to_idx[cnt] = to_len; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + len = wcslen ((const wchar_t *) srunp->str) + 1; + wmemcpy ((wchar_t *) &ctype->to_tbl[to_len], + (const wchar_t *) srunp->str, len); + to_len += len; + srunp = srunp->next; + } + ctype->to_tbl[to_len++] = L'\0'; + } + + /* Now create the tables for the other endianess. */ + for (cnt = 0; cnt < number; ++cnt) + { + ctype->from_idx_ob[cnt] = bswap_32 (ctype->from_idx[cnt]); + ctype->to_idx_ob[cnt] = bswap_32 (ctype->to_idx[cnt]); + } + for (cnt = 0; cnt < from_len; ++cnt) + ctype->from_tbl[cnt] = bswap_32 (ctype->from_tbl_ob[cnt]); + for (cnt = 0; cnt < to_len; ++cnt) + ctype->to_tbl[cnt] = bswap_32 (ctype->to_tbl_ob[cnt]); + + /* Store the information about the length. */ + ctype->translit_idx_size = number * sizeof (uint32_t); + ctype->translit_from_tbl_size = from_len * sizeof (uint32_t); + ctype->translit_to_tbl_size = to_len * sizeof (uint32_t); + } + else + { + /* Provide some dummy pointers since we have nothing to write out. */ + static uint32_t no_str = { 0 }; + + ctype->translit_from_idx_el = &no_str; + ctype->translit_from_idx_eb = &no_str; + ctype->translit_from_tbl_el = &no_str; + ctype->translit_from_tbl_eb = &no_str; + ctype->translit_to_tbl_el = &no_str; + ctype->translit_to_tbl_eb = &no_str; + ctype->translit_idx_size = 0; + ctype->translit_from_tbl_size = 0; + ctype->translit_to_tbl_size = 0; + } } diff --git a/locale/programs/ld-identification.c b/locale/programs/ld-identification.c new file mode 100644 index 0000000..79bcd44 --- /dev/null +++ b/locale/programs/ld-identification.c @@ -0,0 +1,376 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <stdlib.h> +#include <string.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_IDENTIFICATION locale. */ +struct locale_identification_t +{ + const char *title; + const char *source; + const char *address; + const char *contact; + const char *email; + const char *tel; + const char *fax; + const char *language; + const char *territory; + const char *audience; + const char *application; + const char *abbreviation; + const char *revision; + const char *date; + const char *category[__LC_LAST]; +}; + + +static void +identification_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + locale->categories[LC_IDENTIFICATION].identification = + (struct locale_identification_t *) + xcalloc (1, sizeof (struct locale_identification_t)); + + locale->categories[LC_IDENTIFICATION].identification->category[LC_ALL] = + ""; + } + + lr->translate_strings = 1; + lr->return_widestr = 0; +} + + +void +identification_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + struct locale_identification_t *identification + = locale->categories[LC_IDENTIFICATION].identification; + +#define TEST_ELEM(cat) \ + if (identification->cat == NULL) \ + { \ + if (verbose) \ + error (0, 0, _("%s: field `%s' not defined"), \ + "LC_IDENTIFICATION", #cat); \ + identification->cat = ""; \ + } + + TEST_ELEM (title); + TEST_ELEM (source); + TEST_ELEM (address); + TEST_ELEM (contact); + TEST_ELEM (email); + TEST_ELEM (tel); + TEST_ELEM (fax); + TEST_ELEM (language); + TEST_ELEM (territory); + TEST_ELEM (audience); + TEST_ELEM (application); + TEST_ELEM (abbreviation); + TEST_ELEM (revision); + TEST_ELEM (date); +} + + +void +identification_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_identification_t *identification + = locale->categories[LC_IDENTIFICATION].identification; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION) + + (__LC_LAST - 1)]; + struct locale_file data; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION)]; + size_t cnt = 0; + size_t num; + + data.magic = LIMAGIC (LC_IDENTIFICATION); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) identification->title; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->source; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->address; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->contact; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->email; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->tel; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->fax; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->language; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->territory; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->audience; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->application; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->abbreviation; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->revision; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) identification->date; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + for (num = 0; num < __LC_LAST; ++num) + { + iov[cnt].iov_base = (void *) identification->category[num]; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + } + + assert (cnt == (2 + _NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION) + + (__LC_LAST - 1))); + + write_locale_data (output_path, "LC_IDENTIFICATION", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION), iov); +} + + +/* The parser for the LC_IDENTIFICATION section of the locale definition. */ +void +identification_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_identification_t *identification; + struct token *now; + struct token *arg; + struct token *cattok; + int category; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_IDENTIFICATION' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_identification, + LC_IDENTIFICATION, "LC_IDENTIFICATION", ignore_content); + return; + } + + /* Prepare the data structures. */ + identification_startup (ldfile, result, ignore_content); + identification = result->categories[LC_IDENTIFICATION].identification; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (identification->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_IDENTIFICATION", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_IDENTIFICATION", #cat); \ + identification->cat = ""; \ + } \ + else if (!ignore_content) \ + identification->cat = arg->val.str.startmb; \ + break + + STR_ELEM (title); + STR_ELEM (source); + STR_ELEM (address); + STR_ELEM (contact); + STR_ELEM (email); + STR_ELEM (tel); + STR_ELEM (fax); + STR_ELEM (language); + STR_ELEM (territory); + STR_ELEM (audience); + STR_ELEM (application); + STR_ELEM (abbreviation); + STR_ELEM (revision); + STR_ELEM (date); + + case tok_category: + /* We expect two operands. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok != tok_string && arg->tok != tok_ident) + goto err_label; + /* Next is a semicolon. */ + cattok = lr_token (ldfile, charmap, NULL); + if (cattok->tok != tok_semicolon) + goto err_label; + /* Now a LC_xxx identifier. */ + cattok = lr_token (ldfile, charmap, NULL); + switch (cattok->tok) + { +#define CATEGORY(lname, uname) \ + case tok_lc_##lname: \ + category = LC_##uname; \ + break + + CATEGORY (identification, IDENTIFICATION); + CATEGORY (ctype, CTYPE); + CATEGORY (collate, COLLATE); + CATEGORY (time, TIME); + CATEGORY (numeric, NUMERIC); + CATEGORY (monetary, MONETARY); + CATEGORY (messages, MESSAGES); + CATEGORY (paper, PAPER); + CATEGORY (name, NAME); + CATEGORY (address, ADDRESS); + CATEGORY (telephone, TELEPHONE); + CATEGORY (measurement, MEASUREMENT); + + default: + goto err_label; + } + if (identification->category[category] != NULL) + { + lr_error (ldfile, _("\ +%s: duplicate category version definition"), "LC_IDENTIFICATION"); + free (arg->val.str.startmb); + } + else + identification->category[category] = arg->val.str.startmb; + break; + + case tok_end: + /* Next we assume `LC_IDENTIFICATION'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_IDENTIFICATION"); + else if (arg->tok != tok_lc_identification) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_IDENTIFICATION"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_identification); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_IDENTIFICATION"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_IDENTIFICATION"); +} diff --git a/locale/programs/ld-measurement.c b/locale/programs/ld-measurement.c new file mode 100644 index 0000000..38a6160 --- /dev/null +++ b/locale/programs/ld-measurement.c @@ -0,0 +1,206 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_MEASUREMENT locale. */ +struct locale_measurement_t +{ + unsigned char measurement; +}; + + +static void +measurement_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_MEASUREMENT].measurement = + (struct locale_measurement_t *) + xcalloc (1, sizeof (struct locale_measurement_t)); + + lr->translate_strings = 1; + lr->return_widestr = 0; +} + + +void +measurement_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + struct locale_measurement_t *measurement = + locale->categories[LC_MEASUREMENT].measurement; + + if (measurement->measurement == 0) + { + error (0, 0, _("%s: field `%s' not defined"), + "LC_MEASUREMENT", "measurement"); + /* Use as the default value the value of the i18n locale. */ + measurement->measurement = 1; + } + else + { + if (measurement->measurement > 3) + error (0, 0, _("%s: invalid value for field `%s'"), + "LC_MEASUREMENT", "meassurement"); + } +} + + +void +measurement_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_measurement_t *measurement = + locale->categories[LC_MEASUREMENT].measurement; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT)]; + struct locale_file data; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT)]; + size_t cnt = 0; + + data.magic = LIMAGIC (LC_MEASUREMENT); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = &measurement->measurement; + iov[cnt].iov_len = 1; + ++cnt; + + assert (cnt == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT)); + + write_locale_data (output_path, "LC_MEASUREMENT", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT), iov); +} + + +/* The parser for the LC_MEASUREMENT section of the locale definition. */ +void +measurement_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_measurement_t *measurement; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_MEASUREMENT' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_measurement, + LC_MEASUREMENT, "LC_MEASUREMENT", ignore_content); + return; + } + + /* Prepare the data structures. */ + measurement_startup (ldfile, result, ignore_content); + measurement = result->categories[LC_MEASUREMENT].measurement; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define INT_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (measurement->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MEASUREMENT", #cat); \ + else if (!ignore_content) \ + measurement->cat = arg->val.num; \ + break + + INT_ELEM (measurement); + + case tok_end: + /* Next we assume `LC_MEASUREMENT'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_MEASUREMENT"); + else if (arg->tok != tok_lc_measurement) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MEASUREMENT"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_measurement); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MEASUREMENT"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), + "LC_MEASUREMENT"); +} diff --git a/locale/programs/ld-messages.c b/locale/programs/ld-messages.c index 69e411c..33b7735 100644 --- a/locale/programs/ld-messages.c +++ b/locale/programs/ld-messages.c @@ -1,6 +1,6 @@ -/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -21,28 +21,18 @@ # include <config.h> #endif -#include <alloca.h> #include <langinfo.h> +#include <sys/types.h> +#include <regex.h> #include <string.h> -#include <libintl.h> #include <sys/uio.h> -#ifdef HAVE_REGEX -# include <regex.h> -#else -# include <rx.h> -#endif - -/* Undefine following line in production version. */ -/* #define NDEBUG 1 */ #include <assert.h> -#include "locales.h" -#include "stringtrans.h" +#include "linereader.h" +#include "localedef.h" #include "localeinfo.h" - - -extern void *xmalloc (size_t __n); +#include "locfile.h" /* The real definition of the struct for the LC_MESSAGES locale. */ @@ -55,44 +45,51 @@ struct locale_messages_t }; -void +static void messages_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) + int ignore_content) { - struct locale_messages_t *messages; + if (!ignore_content) + locale->categories[LC_MESSAGES].messages = + (struct locale_messages_t *) xcalloc (1, + sizeof (struct locale_messages_t)); - /* We have a definition for LC_MESSAGES. */ - copy_posix.mask &= ~(1 << LC_MESSAGES); - - /* It is important that we always use UCS1 encoding for strings now. */ - encoding_method = ENC_UCS1; - - locale->categories[LC_MESSAGES].messages = messages = - (struct locale_messages_t *) xmalloc (sizeof (struct locale_messages_t)); - - memset (messages, '\0', sizeof (struct locale_messages_t)); + lr->translate_strings = 1; + lr->return_widestr = 0; } void -messages_finish (struct localedef_t *locale) +messages_finish (struct localedef_t *locale, struct charmap_t *charmap) { struct locale_messages_t *messages = locale->categories[LC_MESSAGES].messages; /* The fields YESSTR and NOSTR are optional. */ + if (messages->yesstr == NULL) + messages->yesstr = ""; + if (messages->nostr == NULL) + messages->nostr = ""; + if (messages->yesexpr == NULL) { if (!be_quiet) - error (0, 0, _("field `%s' in category `%s' undefined"), - "yesexpr", "LC_MESSAGES"); + error (0, 0, _("%s: field `%s' undefined"), "LC_MESSAGES", "yesexpr"); + messages->yesexpr = ""; + } + else if (messages->yesexpr[0] == '\0') + { + if (!be_quiet) + error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MESSAGES", "yesexpr"); } else { int result; regex_t re; - /* Test whether it is a correct regular expression. */ + /* Test whether it are correct regular expressions. */ result = regcomp (&re, messages->yesexpr, REG_EXTENDED); if (result != 0 && !be_quiet) { @@ -100,23 +97,32 @@ messages_finish (struct localedef_t *locale) (void) regerror (result, &re, errbuf, BUFSIZ); error (0, 0, _("\ -no correct regular expression for field `%s' in category `%s': %s"), - "yesexpr", "LC_MESSAGES", errbuf); +%s: no correct regular expression for field `%s': %s"), + "LC_MESSAGES", "yesexpr", errbuf); } + else if (result != 0) + regfree (&re); } if (messages->noexpr == NULL) { if (!be_quiet) - error (0, 0, _("field `%s' in category `%s' undefined"), - "noexpr", "LC_MESSAGES"); + error (0, 0, _("%s: field `%s' undefined"), "LC_MESSAGES", "noexpr"); + messages->noexpr = ""; + } + else if (messages->noexpr[0] == '\0') + { + if (!be_quiet) + error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MESSAGES", "noexpr"); } else { int result; regex_t re; - /* Test whether it is a correct regular expression. */ + /* Test whether it are correct regular expressions. */ result = regcomp (&re, messages->noexpr, REG_EXTENDED); if (result != 0 && !be_quiet) { @@ -124,33 +130,26 @@ no correct regular expression for field `%s' in category `%s': %s"), (void) regerror (result, &re, errbuf, BUFSIZ); error (0, 0, _("\ -no correct regular expression for field `%s' in category `%s': %s"), - "noexpr", "LC_MESSAGES", errbuf); +%s: no correct regular expression for field `%s': %s"), + "LC_MESSAGES", "noexpr", errbuf); } + else if (result != 0) + regfree (&re); } } void -messages_output (struct localedef_t *locale, const char *output_path) +messages_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) { struct locale_messages_t *messages = locale->categories[LC_MESSAGES].messages; struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)]; struct locale_file data; - u_int32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)]; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)]; size_t cnt = 0; - if ((locale->binary & (1 << LC_MESSAGES)) != 0) - { - iov[0].iov_base = messages; - iov[0].iov_len = locale->len[LC_MESSAGES]; - - write_locale_data (output_path, "LC_MESSAGES", 1, iov); - - return; - } - data.magic = LIMAGIC (LC_MESSAGES); data.n = _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES); iov[cnt].iov_base = (void *) &data; @@ -162,22 +161,22 @@ messages_output (struct localedef_t *locale, const char *output_path) ++cnt; idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; - iov[cnt].iov_base = (void *) (messages->yesexpr ?: ""); + iov[cnt].iov_base = (char *) messages->yesexpr; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (messages->noexpr ?: ""); + iov[cnt].iov_base = (char *) messages->noexpr; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (messages->yesstr ?: ""); + iov[cnt].iov_base = (char *) messages->yesstr; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (messages->nostr ?: ""); + iov[cnt].iov_base = (char *) messages->nostr; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)); @@ -187,61 +186,112 @@ messages_output (struct localedef_t *locale, const char *output_path) } +/* The parser for the LC_MESSAGES section of the locale definition. */ void -messages_add (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) +messages_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) { - struct locale_messages_t *messages - = locale->categories[LC_MESSAGES].messages; + struct repertoire_t *repertoire = NULL; + struct locale_messages_t *messages; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); - switch (tok) + /* The rest of the line containing `LC_MESSAGES' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do { - case tok_yesexpr: - if (code->val.str.start == NULL) - { - lr_error (lr, _("unknown character in field `%s' of category `%s'"), - "yesexpr", "LC_MESSAGES"); - messages->yesexpr = ""; - } - else - messages->yesexpr = code->val.str.start; - break; + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); - case tok_noexpr: - if (code->val.str.start == NULL) - { - lr_error (lr, _("unknown character in field `%s' of category `%s'"), - "noexpr", "LC_MESSAGES"); - messages->noexpr = ""; - } - else - messages->noexpr = code->val.str.start; - break; + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_messages, LC_MESSAGES, + "LC_MESSAGES", ignore_content); + return; + } - case tok_yesstr: - if (code->val.str.start == NULL) + /* Prepare the data structures. */ + messages_startup (ldfile, result, ignore_content); + messages = result->categories[LC_MESSAGES].messages; + + while (1) + { + struct token *arg; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) { - lr_error (lr, _("unknown character in field `%s' of category `%s'"), - "yesstr", "LC_MESSAGES"); - messages->yesstr = ""; + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; } - else - messages->yesstr = code->val.str.start; - break; - case tok_nostr: - if (code->val.str.start == NULL) + switch (nowtok) { - lr_error (lr, _("unknown character in field `%s' of category `%s'"), - "nostr", "LC_MESSAGES"); - messages->nostr = ""; +#define STR_ELEM(cat) \ + case tok_##cat: \ + if (messages->cat != NULL) \ + { \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_MESSAGES", #cat); \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + now = lr_token (ldfile, charmap, repertoire); \ + if (now->tok != tok_string) \ + goto syntax_error; \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MESSAGES", #cat); \ + messages->cat = ""; \ + } \ + else if (!ignore_content) \ + messages->cat = now->val.str.startmb; \ + break + + STR_ELEM (yesexpr); + STR_ELEM (noexpr); + STR_ELEM (yesstr); + STR_ELEM (nostr); + + case tok_end: + /* Next we assume `LC_MESSAGES'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MESSAGES"); + else if (arg->tok != tok_lc_messages) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MESSAGES"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_messages); + return; + + default: + syntax_error: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MESSAGES"); } - else - messages->nostr = code->val.str.start; - break; - default: - assert (! "unknown token in category `LC_MESSAGES': should not happen"); + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_MESSAGES"); } diff --git a/locale/programs/ld-monetary.c b/locale/programs/ld-monetary.c index b903d63..61f9f8d 100644 --- a/locale/programs/ld-monetary.c +++ b/locale/programs/ld-monetary.c @@ -1,6 +1,6 @@ -/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -21,26 +21,22 @@ # include <config.h> #endif +#include <byteswap.h> #include <langinfo.h> #include <limits.h> #include <stdlib.h> #include <string.h> -#include <libintl.h> +#include <sys/uio.h> - -/* Undefine following line in production version. */ -/* #define NDEBUG 1 */ #include <assert.h> -#include "locales.h" +#include "linereader.h" +#include "localedef.h" #include "localeinfo.h" -#include "stringtrans.h" - -extern void *xmalloc (size_t __n); -extern void *xrealloc (void *__ptr, size_t __n); +#include "locfile.h" -/* The real definition of the struct for the LC_NUMERIC locale. */ +/* The real definition of the struct for the LC_MONETARY locale. */ struct locale_monetary_t { const char *int_curr_symbol; @@ -48,8 +44,7 @@ struct locale_monetary_t const char *mon_decimal_point; const char *mon_thousands_sep; char *mon_grouping; - size_t mon_grouping_max; - size_t mon_grouping_act; + size_t mon_grouping_len; const char *positive_sign; const char *negative_sign; signed char int_frac_digits; @@ -60,10 +55,38 @@ struct locale_monetary_t signed char n_sep_by_space; signed char p_sign_posn; signed char n_sign_posn; + signed char int_p_cs_precedes; + signed char int_p_sep_by_space; + signed char int_n_cs_precedes; + signed char int_n_sep_by_space; + signed char int_p_sign_posn; + signed char int_n_sign_posn; + const char *duo_int_curr_symbol; + const char *duo_currency_symbol; + signed char duo_int_frac_digits; + signed char duo_frac_digits; + signed char duo_p_cs_precedes; + signed char duo_p_sep_by_space; + signed char duo_n_cs_precedes; + signed char duo_n_sep_by_space; + signed char duo_p_sign_posn; + signed char duo_n_sign_posn; + signed char duo_int_p_cs_precedes; + signed char duo_int_p_sep_by_space; + signed char duo_int_n_cs_precedes; + signed char duo_int_n_sep_by_space; + signed char duo_int_p_sign_posn; + signed char duo_int_n_sign_posn; + uint32_t uno_valid_from; + uint32_t uno_valid_to; + uint32_t duo_valid_from; + uint32_t duo_valid_to; + uint32_t conversion_rate[2]; + uint32_t conversion_rate_ob[2]; }; -/* The contents of the field int_curr_symbol have to be taken from +/* The content iof the field int_curr_symbol has to be taken from ISO-4217. We test for correct values. */ #define DEFINE_INT_CURR(str) str, static const char *const valid_int_curr[] = @@ -76,52 +99,73 @@ static const char *const valid_int_curr[] = /* Prototypes for local functions. */ -static int curr_strcmp(const char *s1, const char **s2); +static int curr_strcmp (const char *s1, const char **s2); -void +static void monetary_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) + int ignore_content) { - struct locale_monetary_t *monetary; - - /* We have a definition for LC_MONETARY. */ - copy_posix.mask &= ~(1 << LC_MONETARY); - - /* It is important that we always use UCS1 encoding for strings now. */ - encoding_method = ENC_UCS1; - - locale->categories[LC_MONETARY].monetary = monetary = - (struct locale_monetary_t *) xmalloc (sizeof (struct locale_monetary_t)); - - memset (monetary, '\0', sizeof (struct locale_monetary_t)); - - monetary->mon_grouping_max = 80; - monetary->mon_grouping = - (char *) xmalloc (monetary->mon_grouping_max); - monetary->mon_grouping_act = 0; + if (!ignore_content) + { + struct locale_monetary_t *monetary; + + locale->categories[LC_MONETARY].monetary = monetary = + (struct locale_monetary_t *) xmalloc (sizeof (*monetary)); + + memset (monetary, '\0', sizeof (struct locale_monetary_t)); + + monetary->mon_grouping = NULL; + monetary->mon_grouping_len = 0; + + monetary->int_frac_digits = -2; + monetary->frac_digits = -2; + monetary->p_cs_precedes = -2; + monetary->p_sep_by_space = -2; + monetary->n_cs_precedes = -2; + monetary->n_sep_by_space = -2; + monetary->p_sign_posn = -2; + monetary->n_sign_posn = -2; + monetary->int_p_cs_precedes = -2; + monetary->int_p_sep_by_space = -2; + monetary->int_n_cs_precedes = -2; + monetary->int_n_sep_by_space = -2; + monetary->int_p_sign_posn = -2; + monetary->int_n_sign_posn = -2; + monetary->duo_int_frac_digits = -2; + monetary->duo_frac_digits = -2; + monetary->duo_p_cs_precedes = -2; + monetary->duo_p_sep_by_space = -2; + monetary->duo_n_cs_precedes = -2; + monetary->duo_n_sep_by_space = -2; + monetary->duo_p_sign_posn = -2; + monetary->duo_n_sign_posn = -2; + monetary->duo_int_p_cs_precedes = -2; + monetary->duo_int_p_sep_by_space = -2; + monetary->duo_int_n_cs_precedes = -2; + monetary->duo_int_n_sep_by_space = -2; + monetary->duo_int_p_sign_posn = -2; + monetary->duo_int_n_sign_posn = -2; + } - monetary->int_frac_digits = -2; - monetary->frac_digits = -2; - monetary->p_cs_precedes = -2; - monetary->p_sep_by_space = -2; - monetary->n_cs_precedes = -2; - monetary->n_sep_by_space = -2; - monetary->p_sign_posn = -2; - monetary->n_sign_posn = -2; + lr->translate_strings = 1; + lr->return_widestr = 0; } void -monetary_finish (struct localedef_t *locale) +monetary_finish (struct localedef_t *locale, struct charmap_t *charmap) { struct locale_monetary_t *monetary = locale->categories[LC_MONETARY].monetary; -#define TEST_ELEM(cat) \ +#define TEST_ELEM(cat) \ if (monetary->cat == NULL && !be_quiet) \ - error (0, 0, _("field `%s' in category `%s' undefined"), \ - #cat, "LC_MONETARY") + { \ + error (0, 0, _("%s: field `%s' not defined"), \ + "LC_MONETARY", #cat); \ + monetary->cat = ""; \ + } TEST_ELEM (int_curr_symbol); TEST_ELEM (currency_symbol); @@ -133,21 +177,21 @@ monetary_finish (struct localedef_t *locale) /* The international currency symbol must come from ISO 4217. */ if (monetary->int_curr_symbol != NULL) { - if (strlen (monetary->int_curr_symbol) != 4 - && monetary->int_curr_symbol[0] != '\0') + if (strlen (monetary->int_curr_symbol) != 4) { if (!be_quiet) error (0, 0, _("\ -value of field `int_curr_symbol' in category `LC_MONETARY' has wrong length")); +%s: value of field `int_curr_symbol' has wrong length"), + "LC_MONETARY"); } - else if (monetary->int_curr_symbol[0] != '\0' - && bsearch (monetary->int_curr_symbol, valid_int_curr, - NR_VALID_INT_CURR, sizeof (const char *), - (comparison_fn_t) curr_strcmp) == NULL + else if (bsearch (monetary->int_curr_symbol, valid_int_curr, + NR_VALID_INT_CURR, sizeof (const char *), + (comparison_fn_t) curr_strcmp) == NULL && !be_quiet) error (0, 0, _("\ -value of field `int_curr_symbol' in category `LC_MONETARY' does \ -not correspond to a valid name in ISO 4217")); +%s: value of field `int_curr_symbol' does \ +not correspond to a valid name in ISO 4217"), + "LC_MONETARY"); } /* The decimal point must not be empty. This is not said explicitly @@ -156,27 +200,27 @@ not correspond to a valid name in ISO 4217")); if (monetary->mon_decimal_point[0] == '\0' && !be_quiet) { error (0, 0, _("\ -value for field `%s' in category `%s' must not be the empty string"), - "mon_decimal_point", "LC_MONETARY"); +%s: value for field `%s' must not be the empty string"), + "LC_MONETARY", "mon_decimal_point"); } - if (monetary->mon_grouping_act == 0 && !be_quiet) - error (0, 0, _("field `%s' in category `%s' undefined"), - "mon_grouping", "LC_MONETARY"); + if (monetary->mon_grouping_len == 0 && !be_quiet) + error (0, 0, _("%s: field `%s' not defined"), + "LC_MONETARY", "mon_grouping"); #undef TEST_ELEM -#define TEST_ELEM(cat, min, max) \ +#define TEST_ELEM(cat, min, max) \ if (monetary->cat == -2 && !be_quiet) \ - error (0, 0, _("field `%s' in category `%s' undefined"), \ - #cat, "LC_MONETARY"); \ + error (0, 0, _("%s: field `%s' not defined"), \ + "LC_MONETARY", #cat); \ else if ((monetary->cat < min || monetary->cat > max) && !be_quiet) \ error (0, 0, _("\ -value for field `%s' in category `%s' must be in range %d...%d"), \ - #cat, "LC_MONETARY", min, max) +%s: value for field `%s' must be in range %d...%d"), \ + "LC_MONETARY", #cat, min, max) #if 0 - /* The following two tests are not really necessary because all values - the variable could have are valid. */ +/* The following two test are not really necessary because all values + the variable could have are valid. */ TEST_ELEM (int_frac_digits, -128, 127); /* No range check. */ TEST_ELEM (frac_digits, -128, 127); /* No range check. */ #endif @@ -186,29 +230,78 @@ value for field `%s' in category `%s' must be in range %d...%d"), \ TEST_ELEM (n_sep_by_space, -1, 2); TEST_ELEM (p_sign_posn, -1, 4); TEST_ELEM (n_sign_posn, -1, 4); + + /* The non-POSIX.2 extensions are optional. */ + if (monetary->duo_int_curr_symbol == NULL) + monetary->duo_int_curr_symbol = monetary->int_curr_symbol; + if (monetary->duo_currency_symbol == NULL) + monetary->duo_currency_symbol = monetary->currency_symbol; + + if (monetary->duo_int_frac_digits == -2) + monetary->duo_int_frac_digits = monetary->int_frac_digits; + if (monetary->duo_frac_digits == -2) + monetary->duo_frac_digits = monetary->frac_digits; + +#undef TEST_ELEM +#define TEST_ELEM(cat, alt, min, max) \ + if (monetary->cat == -2 && !be_quiet) \ + monetary->cat = monetary->alt; \ + else if ((monetary->cat < min || monetary->cat > max) && !be_quiet) \ + error (0, 0, _("\ +%s: value for field `%s' must be in range %d...%d"), \ + "LC_MONETARY", #cat, min, max) + + TEST_ELEM (int_p_cs_precedes, p_cs_precedes, -1, 1); + TEST_ELEM (int_p_sep_by_space, p_sep_by_space, -1, 2); + TEST_ELEM (int_n_cs_precedes, n_cs_precedes, -1, 1); + TEST_ELEM (int_n_sep_by_space, n_sep_by_space, -1, 2); + TEST_ELEM (int_p_sign_posn, p_sign_posn, -1, 4); + TEST_ELEM (int_n_sign_posn, n_sign_posn, -1, 4); + + TEST_ELEM (duo_p_cs_precedes, p_cs_precedes, -1, 1); + TEST_ELEM (duo_p_sep_by_space, p_sep_by_space, -1, 2); + TEST_ELEM (duo_n_cs_precedes, n_cs_precedes, -1, 1); + TEST_ELEM (duo_n_sep_by_space, n_sep_by_space, -1, 2); + TEST_ELEM (duo_int_p_cs_precedes, int_p_cs_precedes, -1, 1); + TEST_ELEM (duo_int_p_sep_by_space, int_p_sep_by_space, -1, 2); + TEST_ELEM (duo_int_n_cs_precedes, int_n_cs_precedes, -1, 1); + TEST_ELEM (duo_int_n_sep_by_space, int_n_sep_by_space, -1, 2); + TEST_ELEM (duo_p_sign_posn, p_sign_posn, -1, 4); + TEST_ELEM (duo_n_sign_posn, n_sign_posn, -1, 4); + TEST_ELEM (duo_int_p_sign_posn, int_p_sign_posn, -1, 4); + TEST_ELEM (duo_int_n_sign_posn, int_n_sign_posn, -1, 4); + + if (monetary->uno_valid_from == 0) + monetary->uno_valid_from = 10101; + if (monetary->uno_valid_to == 0) + monetary->uno_valid_to = 99991231; + if (monetary->duo_valid_from == 0) + monetary->duo_valid_from = 10101; + if (monetary->duo_valid_to == 0) + monetary->duo_valid_to = 99991231; + + if (monetary->conversion_rate[0] == 0) + { + monetary->conversion_rate[0] = 1; + monetary->conversion_rate[1] = 1; + } + + monetary->conversion_rate_ob[0] = bswap_32 (monetary->conversion_rate[0]); + monetary->conversion_rate_ob[1] = bswap_32 (monetary->conversion_rate[1]); } void -monetary_output (struct localedef_t *locale, const char *output_path) +monetary_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) { struct locale_monetary_t *monetary = locale->categories[LC_MONETARY].monetary; struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)]; struct locale_file data; - u_int32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)]; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)]; size_t cnt = 0; - if ((locale->binary & (1 << LC_MONETARY)) != 0) - { - iov[0].iov_base = monetary; - iov[0].iov_len = locale->len[LC_MONETARY]; - - write_locale_data (output_path, "LC_MONETARY", 1, iov); - - return; - } - data.magic = LIMAGIC (LC_MONETARY); data.n = _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY); iov[cnt].iov_base = (void *) &data; @@ -220,40 +313,37 @@ monetary_output (struct localedef_t *locale, const char *output_path) ++cnt; idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; - iov[cnt].iov_base = (void *) (monetary->int_curr_symbol ?: ""); + iov[cnt].iov_base = (void *) monetary->int_curr_symbol; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (monetary->currency_symbol ?: ""); + iov[cnt].iov_base = (void *) monetary->currency_symbol; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (monetary->mon_decimal_point ?: ""); + iov[cnt].iov_base = (void *) monetary->mon_decimal_point; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (monetary->mon_thousands_sep ?: ""); + iov[cnt].iov_base = (void *) monetary->mon_thousands_sep; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = alloca (monetary->mon_grouping_act + 1); - iov[cnt].iov_len = monetary->mon_grouping_act + 1; - memcpy (iov[cnt].iov_base, monetary->mon_grouping, - monetary->mon_grouping_act); - ((char *) iov[cnt].iov_base)[monetary->mon_grouping_act] = '\0'; + iov[cnt].iov_base = monetary->mon_grouping; + iov[cnt].iov_len = monetary->mon_grouping_len; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (monetary->positive_sign ?: ""); + iov[cnt].iov_base = (void *) monetary->positive_sign; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = (void *) (monetary->negative_sign ?: ""); + iov[cnt].iov_base = (void *) monetary->negative_sign; iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; ++cnt; @@ -295,109 +385,422 @@ monetary_output (struct localedef_t *locale, const char *output_path) idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; iov[cnt].iov_base = (void *) &monetary->n_sign_posn; iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->int_p_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->int_p_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->int_n_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->int_n_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; - assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)); + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->int_p_sign_posn; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->int_n_sign_posn; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) monetary->duo_int_curr_symbol; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) monetary->duo_currency_symbol; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_int_frac_digits; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_frac_digits; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_p_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_p_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_n_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_n_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_int_p_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_int_p_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_int_n_cs_precedes; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_int_n_sep_by_space; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_p_sign_posn; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_n_sign_posn; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_int_p_sign_posn; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_int_n_sign_posn; + iov[cnt].iov_len = 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->uno_valid_from; + iov[cnt].iov_len = 4; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->uno_valid_to; + iov[cnt].iov_len = 4; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_valid_from; + iov[cnt].iov_len = 4; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->duo_valid_to; + iov[cnt].iov_len = 4; + ++cnt; + +#if BYTE_ORDER == LITTLE_ENDIAN +# define conversion_rate_el conversion_rate +# define conversion_rate_eb conversion_rate_ob +#else +# define conversion_rate_el conversion_rate_ob +# define conversion_rate_eb conversion_rate +#endif + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->conversion_rate_el; + iov[cnt].iov_len = 8; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) &monetary->conversion_rate_eb; + iov[cnt].iov_len = 8; + ++cnt; + + assert (cnt == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)); write_locale_data (output_path, "LC_MONETARY", 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY), iov); } +static int +curr_strcmp (const char *s1, const char **s2) +{ + return strcmp (s1, *s2); +} + + +/* The parser for the LC_MONETARY section of the locale definition. */ void -monetary_add (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) +monetary_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) { - struct locale_monetary_t *monetary - = locale->categories[LC_MONETARY].monetary; + struct repertoire_t *repertoire = NULL; + struct locale_monetary_t *monetary; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_MONETARY' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_monetary, LC_MONETARY, + "LC_MONETARY", ignore_content); + return; + } + + /* Prepare the data structures. */ + monetary_startup (ldfile, result, ignore_content); + monetary = result->categories[LC_MONETARY].monetary; - switch (tok) + while (1) { -#define STR_ELEM(cat) \ - case tok_##cat: \ - if (monetary->cat != NULL) \ - lr_error (lr, _("\ -field `%s' in category `%s' declared more than once"), \ - #cat, "LC_MONETARY"); \ - else if (code->val.str.start == NULL) \ - { \ - lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ - #cat, "LC_MONETARY"); \ - monetary->cat = ""; \ - } \ - else \ - monetary->cat = code->val.str.start; \ - break - - STR_ELEM (int_curr_symbol); - STR_ELEM (currency_symbol); - STR_ELEM (mon_decimal_point); - STR_ELEM (mon_thousands_sep); - STR_ELEM (positive_sign); - STR_ELEM (negative_sign); - -#define INT_ELEM(cat) \ - case tok_##cat: \ - if (monetary->cat != -2) \ - lr_error (lr, _("\ -field `%s' in category `%s' declared more than once"), \ - #cat, "LC_MONETARY"); \ - else if (code->tok == tok_minus1) \ - monetary->cat = -1; \ - else \ - monetary->cat = code->val.num; \ - break - - INT_ELEM (int_frac_digits); - INT_ELEM (frac_digits); - INT_ELEM (p_cs_precedes); - INT_ELEM (p_sep_by_space); - INT_ELEM (n_cs_precedes); - INT_ELEM (n_sep_by_space); - INT_ELEM (p_sign_posn); - INT_ELEM (n_sign_posn); - - case tok_mon_grouping: - if (monetary->mon_grouping_act == monetary->mon_grouping_max) + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) { - monetary->mon_grouping_max *= 2; - monetary->mon_grouping = - (char *) xrealloc (monetary->mon_grouping, - monetary->mon_grouping_max); + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; } - if (monetary->mon_grouping[monetary->mon_grouping_act - 1] - == '\177') - lr_error (lr, _("\ -`-1' must be last entry in `%s' field in `%s' category"), - "mon_grouping", "LC_MONETARY"); - else + + switch (nowtok) { - if (code->tok == tok_minus1) - monetary->mon_grouping[monetary->mon_grouping_act++] = '\177'; - else if (code->val.num == 0) - /* A value of 0 disables grouping from here on but we must - not store a NUL character since this terminates the - string. Use something different which must not be used - otherwise. */ - monetary->mon_grouping[monetary->mon_grouping_act++] = '\377'; - else if (code->val.num > 126) - lr_error (lr, _("\ -values for field `%s' in category `%s' must be smaller than 127"), - "mon_grouping", "LC_MONETARY"); +#define STR_ELEM(cat) \ + case tok_##cat: \ + now = lr_token (ldfile, charmap, NULL); \ + if (now->tok != tok_string) \ + goto err_label; \ + else if (monetary->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MONETARY", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MONETARY", #cat); \ + monetary->cat = ""; \ + } \ + else if (!ignore_content) \ + monetary->cat = now->val.str.startmb; \ + lr_ignore_rest (ldfile, 1); \ + break + + STR_ELEM (int_curr_symbol); + STR_ELEM (currency_symbol); + STR_ELEM (mon_decimal_point); + STR_ELEM (mon_thousands_sep); + STR_ELEM (positive_sign); + STR_ELEM (negative_sign); + STR_ELEM (duo_int_curr_symbol); + STR_ELEM (duo_currency_symbol); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + now = lr_token (ldfile, charmap, NULL); \ + if (now->tok != tok_minus1 && now->tok != tok_number) \ + goto err_label; \ + else if (monetary->cat != -2) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MONETARY", #cat); \ + else if (!ignore_content) \ + monetary->cat = now->tok == tok_minus1 ? -1 : now->val.num; \ + break + + INT_ELEM (int_frac_digits); + INT_ELEM (frac_digits); + INT_ELEM (p_cs_precedes); + INT_ELEM (p_sep_by_space); + INT_ELEM (n_cs_precedes); + INT_ELEM (n_sep_by_space); + INT_ELEM (p_sign_posn); + INT_ELEM (n_sign_posn); + INT_ELEM (int_p_cs_precedes); + INT_ELEM (int_p_sep_by_space); + INT_ELEM (int_n_cs_precedes); + INT_ELEM (int_n_sep_by_space); + INT_ELEM (int_p_sign_posn); + INT_ELEM (int_n_sign_posn); + INT_ELEM (duo_int_frac_digits); + INT_ELEM (duo_frac_digits); + INT_ELEM (duo_p_cs_precedes); + INT_ELEM (duo_p_sep_by_space); + INT_ELEM (duo_n_cs_precedes); + INT_ELEM (duo_n_sep_by_space); + INT_ELEM (duo_p_sign_posn); + INT_ELEM (duo_n_sign_posn); + INT_ELEM (duo_int_p_cs_precedes); + INT_ELEM (duo_int_p_sep_by_space); + INT_ELEM (duo_int_n_cs_precedes); + INT_ELEM (duo_int_n_sep_by_space); + INT_ELEM (duo_int_p_sign_posn); + INT_ELEM (duo_int_n_sign_posn); + INT_ELEM (uno_valid_from); + INT_ELEM (uno_valid_to); + INT_ELEM (duo_valid_from); + INT_ELEM (duo_valid_to); + + case tok_mon_grouping: + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_minus1 && now->tok != tok_number) + goto err_label; else - monetary->mon_grouping[monetary->mon_grouping_act++] - = code->val.num; + { + size_t act = 0; + size_t max = 10; + char *grouping = ignore_content ? NULL : xmalloc (max); + + do + { + if (act + 1 >= max) + { + max *= 2; + grouping = xrealloc (grouping, max); + } + + if (act > 0 && grouping[act - 1] == '\177') + { + lr_error (ldfile, _("\ +%s: `-1' must be last entry in `%s' field"), + "LC_MONETARY", "mon_grouping"); + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_minus1) + { + if (!ignore_content) + grouping[act++] = '\177'; + } + else if (now->val.num == 0) + { + /* A value of 0 disables grouping from here on but + we must not store a NUL character since this + terminates the string. Use something different + which must not be used otherwise. */ + if (!ignore_content) + grouping[act++] = '\377'; + } + else if (now->val.num > 126) + lr_error (ldfile, _("\ +%s: values for field `%s' must be smaller than 127"), + "LC_MONETARY", "mon_grouping"); + else if (!ignore_content) + grouping[act++] = now->val.num; + + /* Next must be semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + break; + + now = lr_token (ldfile, charmap, NULL); + } + while (now->tok == tok_minus1 || now->tok == tok_number); + + if (now->tok != tok_eol) + goto err_label; + + if (!ignore_content) + { + grouping[act++] = '\0'; + + monetary->mon_grouping = xrealloc (grouping, act); + monetary->mon_grouping_len = act; + } + } + break; + + case tok_conversion_rate: + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_number) + goto err_label; + if (now->val.num == 0) + { + invalid_conversion_rate: + lr_error (ldfile, _("conversion rate valze cannot be zero")); + if (!ignore_content) + { + monetary->conversion_rate[0] = 1; + monetary->conversion_rate[1] = 1; + } + break; + } + if (!ignore_content) + monetary->conversion_rate[0] = now->val.num; + /* Next must be a semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + goto err_label; + /* And another number. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_number) + goto err_label; + if (now->val.num == 0) + goto invalid_conversion_rate; + if (!ignore_content) + monetary->conversion_rate[1] = now->val.num; + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + break; + + case tok_end: + /* Next we assume `LC_MONETARY'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MONETARY"); + else if (now->tok != tok_lc_monetary) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MONETARY"); + lr_ignore_rest (ldfile, now->tok == tok_lc_monetary); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MONETARY"); } - break; - default: - assert (! "unknown token in category `LC_MONETARY': should not happen"); + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } -} - -static int -curr_strcmp(const char *s1, const char **s2) -{ - return strcmp (s1, *s2); + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_MONETARY"); } diff --git a/locale/programs/ld-name.c b/locale/programs/ld-name.c new file mode 100644 index 0000000..85acb41 --- /dev/null +++ b/locale/programs/ld-name.c @@ -0,0 +1,276 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <string.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_NAME locale. */ +struct locale_name_t +{ + const char *name_fmt; + const char *name_gen; + const char *name_mr; + const char *name_mrs; + const char *name_miss; + const char *name_ms; +}; + + +static void +name_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_NAME].name = + (struct locale_name_t *) xcalloc (1, sizeof (struct locale_name_t)); + + lr->translate_strings = 1; + lr->return_widestr = 0; +} + + +void +name_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + struct locale_name_t *name = locale->categories[LC_NAME].name; + + if (name->name_fmt == NULL) + { + error (0, 0, _("%s: field `%s' not defined"), "LC_NAME", "name_fmt"); + /* Use as the default value the value of the i18n locale. */ + name->name_fmt = "%p%t%g%t%m%t%f"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = name->name_fmt; + + if (*cp == '\0') + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_NAME", "name_fmt"); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (*++cp == 'R') + /* Romanize-flag. */ + ++cp; + if (strchr ("fFgGlomMpsSt", *cp) == NULL) + { + error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), + "LC_NAME", "name_fmt"); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (name->cat == NULL) \ + { \ + if (verbose) \ + error (0, 0, _("%s: field `%s' not defined"), "LC_NAME", #cat); \ + name->cat = ""; \ + } + + TEST_ELEM (name_gen); + TEST_ELEM (name_mr); + TEST_ELEM (name_mrs); + TEST_ELEM (name_miss); + TEST_ELEM (name_ms); +} + + +void +name_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_name_t *name = locale->categories[LC_NAME].name; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_NAME)]; + struct locale_file data; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_NAME)]; + size_t cnt = 0; + + data.magic = LIMAGIC (LC_NAME); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_NAME); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) name->name_fmt; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) name->name_gen; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) name->name_mr; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) name->name_mrs; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) name->name_miss; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) name->name_ms; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + assert (cnt == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_NAME)); + + write_locale_data (output_path, "LC_NAME", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_NAME), iov); +} + + +/* The parser for the LC_NAME section of the locale definition. */ +void +name_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_name_t *name; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_NAME' must be empty. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_name, LC_NAME, + "LC_NAME", ignore_content); + return; + } + + /* Prepare the data structures. */ + name_startup (ldfile, result, ignore_content); + name = result->categories[LC_NAME].name; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (name->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_NAME", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_NAME", #cat); \ + name->cat = ""; \ + } \ + else if (!ignore_content) \ + name->cat = arg->val.str.startmb; \ + break + + STR_ELEM (name_fmt); + STR_ELEM (name_gen); + STR_ELEM (name_mr); + STR_ELEM (name_mrs); + STR_ELEM (name_miss); + STR_ELEM (name_ms); + + case tok_end: + /* Next we assume `LC_NAME'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NAME"); + else if (arg->tok != tok_lc_name) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_NAME"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_name); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_NAME"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_NAME"); +} diff --git a/locale/programs/ld-numeric.c b/locale/programs/ld-numeric.c index 0e61481..3e51aba 100644 --- a/locale/programs/ld-numeric.c +++ b/locale/programs/ld-numeric.c @@ -1,6 +1,6 @@ -/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -21,21 +21,16 @@ # include <config.h> #endif -#include <alloca.h> #include <langinfo.h> #include <string.h> -#include <libintl.h> +#include <sys/uio.h> -/* Undefine following line in production version. */ -/* #define NDEBUG 1 */ #include <assert.h> -#include "locales.h" +#include "linereader.h" +#include "localedef.h" #include "localeinfo.h" -#include "stringtrans.h" - -void *xmalloc (size_t __n); -void *xrealloc (void *__ptr, size_t __n); +#include "locfile.h" /* The real definition of the struct for the LC_NUMERIC locale. */ @@ -44,43 +39,38 @@ struct locale_numeric_t const char *decimal_point; const char *thousands_sep; char *grouping; - size_t grouping_max; - size_t grouping_act; + size_t grouping_len; }; -void +static void numeric_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) + int ignore_content) { - struct locale_numeric_t *numeric; - - /* We have a definition for LC_NUMERIC. */ - copy_posix.mask &= ~(1 << LC_NUMERIC); - - /* It is important that we always use UCS1 encoding for strings now. */ - encoding_method = ENC_UCS1; + if (!ignore_content) + { + struct locale_numeric_t *numeric; - locale->categories[LC_NUMERIC].numeric = numeric = - (struct locale_numeric_t *) xmalloc (sizeof (struct locale_numeric_t)); + locale->categories[LC_NUMERIC].numeric = numeric = + (struct locale_numeric_t *) xcalloc (1, sizeof (*numeric)); - memset (numeric, '\0', sizeof (struct locale_numeric_t)); + numeric->grouping = NULL; + numeric->grouping_len = 0; + } - numeric->grouping_max = 80; - numeric->grouping = (char *) xmalloc (numeric->grouping_max); - numeric->grouping_act = 0; + lr->translate_strings = 1; + lr->return_widestr = 0; } void -numeric_finish (struct localedef_t *locale) +numeric_finish (struct localedef_t *locale, struct charmap_t *charmap) { struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; #define TEST_ELEM(cat) \ if (numeric->cat == NULL && !be_quiet) \ - error (0, 0, _("field `%s' in category `%s' undefined"), \ - #cat, "LC_NUMERIC") + error (0, 0, _("%s: field `%s' not defined"), "LC_NUMERIC", #cat) TEST_ELEM (decimal_point); TEST_ELEM (thousands_sep); @@ -91,35 +81,25 @@ numeric_finish (struct localedef_t *locale) if (numeric->decimal_point[0] == '\0' && !be_quiet) { error (0, 0, _("\ -value for field `%s' in category `%s' must not be the empty string"), - "decimal_point", "LC_NUMERIC"); +%s: value for field `%s' must not be the empty string"), + "LC_NUMERIC", "decimal_point"); } - if (numeric->grouping_act == 0 && !be_quiet) - error (0, 0, _("field `%s' in category `%s' undefined"), - "grouping", "LC_NUMERIC"); + if (numeric->grouping_len == 0 && !be_quiet) + error (0, 0, _("%s: field `%s' not defined"), "LC_NUMERIC", "grouping"); } void -numeric_output (struct localedef_t *locale, const char *output_path) +numeric_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) { struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)]; struct locale_file data; - u_int32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)]; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)]; size_t cnt = 0; - if ((locale->binary & (1 << LC_NUMERIC)) != 0) - { - iov[0].iov_base = numeric; - iov[0].iov_len = locale->len[LC_NUMERIC]; - - write_locale_data (output_path, "LC_NUMERIC", 1, iov); - - return; - } - data.magic = LIMAGIC (LC_NUMERIC); data.n = _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC); iov[cnt].iov_base = (void *) &data; @@ -141,10 +121,8 @@ numeric_output (struct localedef_t *locale, const char *output_path) ++cnt; idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; - iov[cnt].iov_base = alloca (numeric->grouping_act + 1); - iov[cnt].iov_len = numeric->grouping_act + 1; - memcpy (iov[cnt].iov_base, numeric->grouping, numeric->grouping_act); - ((char *) iov[cnt].iov_base)[numeric->grouping_act] = '\0'; + iov[cnt].iov_base = numeric->grouping; + iov[cnt].iov_len = numeric->grouping_len; assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)); @@ -153,66 +131,173 @@ numeric_output (struct localedef_t *locale, const char *output_path) } +/* The parser for the LC_NUMERIC section of the locale definition. */ void -numeric_add (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) +numeric_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) { - struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + struct repertoire_t *repertoire = NULL; + struct locale_numeric_t *numeric; + struct token *now; + enum token_t nowtok; - switch (tok) + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_NUMERIC' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_numeric, LC_NUMERIC, + "LC_NUMERIC", ignore_content); + return; + } + + /* Prepare the data structures. */ + numeric_startup (ldfile, result, ignore_content); + numeric = result->categories[LC_NUMERIC].numeric; + + while (1) { -#define STR_ELEM(cat) \ - case tok_##cat: \ - if (numeric->cat != NULL) \ - lr_error (lr, _("\ -field `%s' in category `%s' declared more than once"), \ - #cat, "LC_NUMERIC"); \ - else if (code->val.str.start == NULL) \ - { \ - lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ - #cat, "LC_NUMERIC"); \ - numeric->cat = ""; \ - } \ - else \ - numeric->cat = code->val.str.start; \ - break - - STR_ELEM (decimal_point); - STR_ELEM (thousands_sep); - - case tok_grouping: - if (numeric->grouping_act == numeric->grouping_max) + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) { - numeric->grouping_max *= 2; - numeric->grouping = (char *) xrealloc (numeric->grouping, - numeric->grouping_max); + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; } - if (numeric->grouping_act > 0 - && (numeric->grouping[numeric->grouping_act - 1] == '\177')) + + switch (nowtok) { - lr_error (lr, _("\ -`-1' must be last entry in `%s' field in `%s' category"), - "grouping", "LC_NUMERIC"); - --numeric->grouping_act; +#define STR_ELEM(cat) \ + case tok_##cat: \ + now = lr_token (ldfile, charmap, NULL); \ + if (now->tok != tok_string) \ + goto err_label; \ + if (numeric->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_NUMERIC", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_NUMERIC", #cat); \ + numeric->cat = ""; \ + } \ + else if (!ignore_content) \ + numeric->cat = now->val.str.startmb; \ + break + + STR_ELEM (decimal_point); + STR_ELEM (thousands_sep); + + case tok_grouping: + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_minus1 && now->tok != tok_number) + goto err_label; + else + { + size_t act = 0; + size_t max = 10; + char *grouping = ignore_content ? NULL : xmalloc (max); + + do + { + if (act + 1 >= max) + { + max *= 2; + grouping = xrealloc (grouping, max); + } + + if (act > 0 && grouping[act - 1] == '\177') + { + lr_error (ldfile, _("\ +%s: `-1' must be last entry in `%s' field"), "LC_NUMERIC", "grouping"); + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_minus1) + { + if (!ignore_content) + grouping[act++] = '\177'; + } + else if (now->val.num == 0) + { + /* A value of 0 disables grouping from here on but + we must not store a NUL character since this + terminates the string. Use something different + which must not be used otherwise. */ + if (!ignore_content) + grouping[act++] = '\377'; + } + else if (now->val.num > 126) + lr_error (ldfile, _("\ +%s: values for field `%s' must be smaller than 127"), + "LC_NUMERIC", "grouping"); + else if (!ignore_content) + grouping[act++] = now->val.num; + + /* Next must be semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + break; + + now = lr_token (ldfile, charmap, NULL); + } + while (now->tok == tok_minus1 || now->tok == tok_number); + + if (now->tok != tok_eol) + goto err_label; + + if (!ignore_content) + { + grouping[act++] = '\0'; + + numeric->grouping = xrealloc (grouping, act); + numeric->grouping_len = act; + } + } + break; + + case tok_end: + /* Next we assume `LC_NUMERIC'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NUMERIC"); + else if (now->tok != tok_lc_numeric) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_NUMERIC"); + lr_ignore_rest (ldfile, now->tok == tok_lc_numeric); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_NUMERIC"); } - if (code->tok == tok_minus1) - numeric->grouping[numeric->grouping_act++] = '\177'; - else if (code->val.num == 0) - /* A value of 0 disables grouping from here on but we must - not store a NUL character since this terminates the string. - Use something different which must not be used otherwise. */ - numeric->grouping[numeric->grouping_act++] = '\377'; - else if (code->val.num > 126) - lr_error (lr, _("\ -values for field `%s' in category `%s' must be smaller than 127"), - "grouping", "LC_NUMERIC"); - else - numeric->grouping[numeric->grouping_act++] = code->val.num; - break; - - default: - assert (! "unknown token in category `LC_NUMERIC': should not happen"); + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_NUMERIC"); } diff --git a/locale/programs/ld-paper.c b/locale/programs/ld-paper.c new file mode 100644 index 0000000..5d834eb --- /dev/null +++ b/locale/programs/ld-paper.c @@ -0,0 +1,235 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_PAPER locale. */ +struct locale_paper_t +{ + uint32_t height; + uint32_t height_ob; + uint32_t width; + uint32_t width_ob; +}; + + +static void +paper_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_PAPER].paper = + (struct locale_paper_t *) xcalloc (1, sizeof (struct locale_paper_t)); + + lr->translate_strings = 1; + lr->return_widestr = 0; +} + + +void +paper_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + struct locale_paper_t *paper = locale->categories[LC_PAPER].paper; + + if (paper->height == 0) + { + error (0, 0, _("%s: field `%s' not defined"), "LC_PAPER", "height"); + /* Use as default values the values from the i18n locale. */ + paper->height = 297; + } + paper->height_ob = bswap_32 (paper->height); + + if (paper->width == 0) + { + error (0, 0, _("%s: field `%s' not defined"), "LC_PAPER", "width"); + /* Use as default values the values from the i18n locale. */ + paper->width = 210; + } + paper->width_ob = bswap_32 (paper->width); +} + + +void +paper_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_paper_t *paper = locale->categories[LC_PAPER].paper; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_PAPER)]; + struct locale_file data; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_PAPER)]; + size_t cnt = 0; + + data.magic = LIMAGIC (LC_PAPER); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_PAPER); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define height_eb height_ob +# define height_el height +# define width_eb width_ob +# define width_el width +#else +# define height_eb height +# define height_el height_ob +# define width_eb width +# define width_el width_ob +#endif + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = &paper->height_eb; + iov[cnt].iov_len = 4; + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = &paper->height_el; + iov[cnt].iov_len = 4; + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = &paper->width_eb; + iov[cnt].iov_len = 4; + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = &paper->width_el; + iov[cnt].iov_len = 4; + ++cnt; + + assert (cnt == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_PAPER)); + + write_locale_data (output_path, "LC_PAPER", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_PAPER), iov); +} + + +/* The parser for the LC_PAPER section of the locale definition. */ +void +paper_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_paper_t *paper; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_PAPER' must be empty. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_paper, LC_PAPER, + "LC_PAPER", ignore_content); + return; + } + + /* Prepare the data structures. */ + paper_startup (ldfile, result, ignore_content); + paper = result->categories[LC_PAPER].paper; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define INT_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (paper->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_PAPER", #cat); \ + else if (!ignore_content) \ + paper->cat = arg->val.num; \ + break + + INT_ELEM (height); + INT_ELEM (width); + + case tok_end: + /* Next we assume `LC_PAPER'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_PAPER"); + else if (arg->tok != tok_lc_paper) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_PAPER"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_paper); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_PAPER"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_PAPER"); +} diff --git a/locale/programs/ld-telephone.c b/locale/programs/ld-telephone.c new file mode 100644 index 0000000..2d75fea --- /dev/null +++ b/locale/programs/ld-telephone.c @@ -0,0 +1,283 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_TELEPHONE locale. */ +struct locale_telephone_t +{ + const char *tel_int_fmt; + const char *tel_dom_fmt; + const char *int_select; + const char *int_prefix; +}; + + +static void +telephone_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_TELEPHONE].telephone = (struct locale_telephone_t *) + xcalloc (1, sizeof (struct locale_telephone_t)); + + lr->translate_strings = 1; + lr->return_widestr = 0; +} + + +void +telephone_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + struct locale_telephone_t *telephone = + locale->categories[LC_TELEPHONE].telephone; + + if (telephone->tel_int_fmt == NULL) + { + error (0, 0, _("%s: field `%s' not defined"), + "LC_TELEPHONE", "tel_int_fmt"); + /* Use as the default value the value of the i18n locale. */ + telephone->tel_int_fmt = "+%c %a %l"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = telephone->tel_int_fmt; + + if (*cp == '\0') + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_TELEPHONE", "tel_int_fmt"); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (strchr ("aAlc", *++cp) == NULL) + { + error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), + "LC_TELEPHONE", "tel_int_fmt"); + break; + } + } + ++cp; + } + } + + if (telephone->tel_dom_fmt == NULL) + telephone->tel_dom_fmt = ""; + else if (telephone->tel_dom_fmt[0] != '\0') + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = telephone->tel_dom_fmt; + + while (*cp != '\0') + { + if (*cp == '%') + { + if (strchr ("aAlc", *++cp) == NULL) + { + error (0, 0, _("%s: invalid escape sequence in field `%s'"), + "LC_TELEPHONE", "tel_dom_fmt"); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (telephone->cat == NULL) \ + { \ + if (verbose) \ + error (0, 0, _("%s: field `%s' not defined"), "LC_TELEPHONE", #cat); \ + telephone->cat = ""; \ + } + + TEST_ELEM (int_select); + TEST_ELEM (int_prefix); +} + + +void +telephone_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_telephone_t *telephone = + locale->categories[LC_TELEPHONE].telephone; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE)]; + struct locale_file data; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE)]; + size_t cnt = 0; + + data.magic = LIMAGIC (LC_TELEPHONE); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) telephone->tel_int_fmt; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) telephone->tel_dom_fmt; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) telephone->int_select; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) telephone->int_prefix; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + assert (cnt == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE)); + + write_locale_data (output_path, "LC_TELEPHONE", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE), iov); +} + + +/* The parser for the LC_TELEPHONE section of the locale definition. */ +void +telephone_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_telephone_t *telephone; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_TELEPHONE' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_telephone, LC_TELEPHONE, + "LC_TELEPHONE", ignore_content); + return; + } + + /* Prepare the data structures. */ + telephone_startup (ldfile, result, ignore_content); + telephone = result->categories[LC_TELEPHONE].telephone; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (telephone->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_TELEPHONE", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TELEPHONE", #cat); \ + telephone->cat = ""; \ + } \ + else if (!ignore_content) \ + telephone->cat = arg->val.str.startmb; \ + break + + STR_ELEM (tel_int_fmt); + STR_ELEM (tel_dom_fmt); + STR_ELEM (int_select); + STR_ELEM (int_prefix); + + case tok_end: + /* Next we assume `LC_TELEPHONE'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TELEPHONE"); + else if (arg->tok != tok_lc_telephone) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_TELEPHONE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_telephone); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_TELEPHONE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_TELEPHONE"); +} diff --git a/locale/programs/ld-time.c b/locale/programs/ld-time.c index c63d897..bae38fc 100644 --- a/locale/programs/ld-time.c +++ b/locale/programs/ld-time.c @@ -1,6 +1,6 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -21,25 +21,19 @@ # include <config.h> #endif +#include <byteswap.h> #include <langinfo.h> +#include <stdlib.h> #include <string.h> -#include <libintl.h> +#include <wchar.h> +#include <sys/uio.h> -/* Undefine following line in production version. */ -/* #define NDEBUG 1 */ #include <assert.h> -#include <stdlib.h> -#include "locales.h" +#include "linereader.h" +#include "localedef.h" #include "localeinfo.h" -#include "stringtrans.h" - -#define SWAPU32(w) \ - (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) - - -extern void *xmalloc (size_t __n); -extern void *xrealloc (void *__p, size_t __n); +#include "locfile.h" /* Entry describing an entry of the era specification. */ @@ -51,6 +45,8 @@ struct era_data int32_t stop_date[3]; const char *name; const char *format; + uint32_t *wname; + uint32_t *wformat; }; @@ -58,75 +54,135 @@ struct era_data struct locale_time_t { const char *abday[7]; - size_t cur_num_abday; + const uint32_t *wabday[7]; + const uint32_t *wabday_ob[7]; + int abday_defined; const char *day[7]; - size_t cur_num_day; + const uint32_t *wday[7]; + const uint32_t *wday_ob[7]; + int day_defined; const char *abmon[12]; - size_t cur_num_abmon; + const uint32_t *wabmon[12]; + const uint32_t *wabmon_ob[12]; + int abmon_defined; const char *mon[12]; - size_t cur_num_mon; + const uint32_t *wmon[12]; + const uint32_t *wmon_ob[12]; + int mon_defined; const char *am_pm[2]; - size_t cur_num_am_pm; + const uint32_t *wam_pm[2]; + const uint32_t *wam_pm_ob[2]; + int am_pm_defined; const char *d_t_fmt; + const uint32_t *wd_t_fmt; + const uint32_t *wd_t_fmt_ob; const char *d_fmt; + const uint32_t *wd_fmt; + const uint32_t *wd_fmt_ob; const char *t_fmt; + const uint32_t *wt_fmt; + const uint32_t *wt_fmt_ob; const char *t_fmt_ampm; + const uint32_t *wt_fmt_ampm; + const uint32_t *wt_fmt_ampm_ob; const char **era; - u_int32_t cur_num_era; + const uint32_t **wera; + const uint32_t **wera_ob; + uint32_t num_era; const char *era_year; + const uint32_t *wera_year; + const uint32_t *wera_year_ob; const char *era_d_t_fmt; + const uint32_t *wera_d_t_fmt; + const uint32_t *wera_d_t_fmt_ob; const char *era_t_fmt; + const uint32_t *wera_t_fmt; + const uint32_t *wera_t_fmt_ob; const char *era_d_fmt; + const uint32_t *wera_d_fmt; + const uint32_t *wera_d_fmt_ob; const char *alt_digits[100]; - u_int32_t cur_num_alt_digits; + const uint32_t *walt_digits[100]; + const uint32_t *walt_digits_ob[100]; + int alt_digits_defined; + unsigned char week_ndays; + uint32_t week_1stday; + unsigned char week_1stweek; + unsigned char first_weekday; + unsigned char first_workday; + unsigned char cal_direction; + const char *timezone; + const uint32_t *wtimezone; struct era_data *era_entries; struct era_data *era_entries_ob; }; -void -time_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) -{ - struct locale_time_t *time; - - /* We have a definition for LC_TIME. */ - copy_posix.mask &= ~(1 << LC_TIME); +/* This constant is used to represent an empty wide character string. */ +static const uint32_t empty_wstr[1] = { 0 }; - /* It is important that we always use UCS1 encoding for strings now. */ - encoding_method = ENC_UCS1; - locale->categories[LC_TIME].time = time = - (struct locale_time_t *) xmalloc (sizeof (struct locale_time_t)); +static void +time_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_TIME].time = + (struct locale_time_t *) xcalloc (1, sizeof (struct locale_time_t)); - memset (time, '\0', sizeof (struct locale_time_t)); + lr->translate_strings = 1; + lr->return_widestr = 0; } void -time_finish (struct localedef_t *locale) +time_finish (struct localedef_t *locale, struct charmap_t *charmap) { struct locale_time_t *time = locale->categories[LC_TIME].time; + size_t cnt; + +#define TESTARR_ELEM(cat) \ + if (!time->cat##_defined && !be_quiet) \ + error (0, 0, _("%s: field `%s' not defined"), "LC_TIME", #cat); \ + else if (time->w##cat != NULL) \ + { \ + size_t n; \ + for (n = 0; n < sizeof (time->w##cat) / sizeof (time->w##cat[0]); ++n) \ + { \ + size_t len = wcslen ((wchar_t *) time->w##cat[n]) + 1; \ + uint32_t *wstr = (uint32_t *) xmalloc (len * sizeof (uint32_t)); \ + do \ + { \ + --len; \ + wstr[len] = bswap_32 (time->w##cat[n][len]); \ + } \ + while (len > 0); \ + time->w##cat##_ob[n] = wstr; \ + } \ + } + + TESTARR_ELEM (abday); + TESTARR_ELEM (day); + TESTARR_ELEM (abmon); + TESTARR_ELEM (mon); + TESTARR_ELEM (am_pm); -#define TESTARR_ELEM(cat, max) \ - if (time->cur_num_##cat == 0 && !be_quiet) \ - error (0, 0, _("field `%s' in category `%s' undefined"), \ - #cat, "LC_TIME"); \ - else if (time->cur_num_##cat != max && !be_quiet) \ - error (0, 0, _("field `%s' in category `%s' has not enough values"), \ - #cat, "LC_TIME") - - TESTARR_ELEM (abday, 7); - TESTARR_ELEM (day, 7); - TESTARR_ELEM (abmon, 12); - TESTARR_ELEM (mon, 12); - TESTARR_ELEM (am_pm, 2); - -#define TEST_ELEM(cat) \ +#define TEST_ELEM(cat) \ if (time->cat == NULL && !be_quiet) \ - error (0, 0, _("field `%s' in category `%s' undefined"), \ - #cat, "LC_TIME") + error (0, 0, _("%s: field `%s' not defined"), "LC_TIME", #cat); \ + else if (time->w##cat != NULL) \ + { \ + size_t len = wcslen ((wchar_t *) time->w##cat) + 1; \ + uint32_t *wstr = (uint32_t *) xmalloc (len * sizeof (uint32_t)); \ + do \ + { \ + --len; \ + wstr[len] = bswap_32 (time->w##cat[len]); \ + } \ + while (len > 0); \ + time->w##cat##_ob = wstr; \ + } TEST_ELEM (d_t_fmt); TEST_ELEM (d_fmt); @@ -135,21 +191,39 @@ time_finish (struct localedef_t *locale) /* According to C.Y.Alexis Cheng <alexis@vnet.ibm.com> the T_FMT_AMPM field is optional. */ if (time->t_fmt_ampm == NULL) - /* Use the 24h format as default. */ - time->t_fmt_ampm = time->t_fmt; + { + /* Use the 24h format as default. */ + time->t_fmt_ampm = time->t_fmt; + time->wt_fmt_ampm = time->wt_fmt; + time->wt_fmt_ampm_ob = time->wt_fmt_ob; + } + else + { + /* Convert the byte order. */ + size_t len = wcslen ((wchar_t *) time->wt_fmt_ampm) + 1; + uint32_t *wstr = (uint32_t *) xmalloc (len * sizeof (uint32_t)); + do + { + --len; + wstr[len] = bswap_32 (time->wt_fmt_ampm[len]); + } + while (len > 0); + time->wt_fmt_ampm_ob = wstr; + } /* Now process the era entries. */ - if (time->cur_num_era != 0) + if (time->num_era != 0) { const int days_per_month[12] = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31 ,30, 31 }; size_t idx; + wchar_t *wstr; time->era_entries = - (struct era_data *) xmalloc (time->cur_num_era + (struct era_data *) xmalloc (time->num_era * sizeof (struct era_data)); - for (idx = 0; idx < time->cur_num_era; ++idx) + for (idx = 0; idx < time->num_era; ++idx) { size_t era_len = strlen (time->era[idx]); char *str = xmalloc ((era_len + 1 + 3) & ~3); @@ -161,9 +235,9 @@ time_finish (struct localedef_t *locale) if (*str != '+' && *str != '-') { if (!be_quiet) - error (0, 0, _("direction flag in string %d in `era' field" - " in category `%s' is not '+' nor '-'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: direction flag in string %d in `era' field" + " is not '+' nor '-'"), + "LC_TIME", idx + 1); /* Default arbitrarily to '+'. */ time->era_entries[idx].direction = '+'; } @@ -172,9 +246,9 @@ time_finish (struct localedef_t *locale) if (*++str != ':') { if (!be_quiet) - error (0, 0, _("direction flag in string %d in `era' field" - " in category `%s' is not a single character"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: direction flag in string %d in `era' field" + " is not a single character"), + "LC_TIME", idx + 1); (void) strsep (&str, ":"); } else @@ -185,17 +259,17 @@ time_finish (struct localedef_t *locale) if (endp == str) { if (!be_quiet) - error (0, 0, _("illegal number for offset in string %d in" - " `era' field in category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: invalid number for offset in string %d in" + " `era' field"), + "LC_TIME", idx + 1); (void) strsep (&str, ":"); } else if (*endp != ':') { if (!be_quiet) - error (0, 0, _("garbage at end of offset value in string %d in" - " `era' field in category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: garbage at end of offset value in" + " string %d in `era' field"), + "LC_TIME", idx + 1); (void) strsep (&str, ":"); } else @@ -241,19 +315,18 @@ time_finish (struct localedef_t *locale) { invalid_start_date: if (!be_quiet) - error (0, 0, _("illegal starting date in string %d in" - " `era' field in category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: invalid starting date in string %d in" + " `era' field"), + "LC_TIME", idx + 1); (void) strsep (&str, ":"); } else if (*endp != ':') { garbage_start_date: if (!be_quiet) - error (0, 0, _("garbage at end of starting date " - "in string %d in `era' field " - "in category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: garbage at end of starting date " + "in string %d in `era' field "), + "LC_TIME", idx + 1); (void) strsep (&str, ":"); } else @@ -270,10 +343,9 @@ time_finish (struct localedef_t *locale) && time->era_entries[idx].start_date[2] == 29 && !__isleap (time->era_entries[idx].start_date[0]))) && !be_quiet) - error (0, 0, _("starting date is illegal in" - " string %d in `era' field in" - " category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: starting date is invalid in" + " string %d in `era' field"), + "LC_TIME", idx + 1); } } @@ -317,19 +389,18 @@ time_finish (struct localedef_t *locale) { invalid_stop_date: if (!be_quiet) - error (0, 0, _("illegal stopping date in string %d in" - " `era' field in category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: invalid stopping date in string %d in" + " `era' field"), + "LC_TIME", idx + 1); (void) strsep (&str, ":"); } else if (*endp != ':') { garbage_stop_date: if (!be_quiet) - error (0, 0, _("garbage at end of stopping date " - "in string %d in `era' field " - "in category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: garbage at end of stopping date " + "in string %d in `era' field"), + "LC_TIME", idx + 1); (void) strsep (&str, ":"); } else @@ -346,23 +417,19 @@ time_finish (struct localedef_t *locale) && time->era_entries[idx].stop_date[2] == 29 && !__isleap (time->era_entries[idx].stop_date[0]))) && !be_quiet) - error (0, 0, _("stopping date is illegal in" - " string %d in `era' field in" - " category `%s'"), - idx + 1, "LC_TIME"); + error (0, 0, _("%s: stopping date is invalid in" + " string %d in `era' field"), + "LC_TIME", idx + 1); } } if (str == NULL || *str == '\0') { if (!be_quiet) - error (0, 0, _("missing era name in string %d in `era' field" - " in category `%s'"), idx + 1, "LC_TIME"); - /* Make sure that name and format are adjacent strings - in memory. */ - time->era_entries[idx].name = "\0"; - time->era_entries[idx].format - = time->era_entries[idx].name + 1; + error (0, 0, _("%s: missing era name in string %d in `era'" + " field"), "LC_TIME", idx + 1); + time->era_entries[idx].name = + time->era_entries[idx].format = ""; } else { @@ -371,74 +438,149 @@ time_finish (struct localedef_t *locale) if (str == NULL || *str == '\0') { if (!be_quiet) - error (0, 0, _("missing era format in string %d in `era'" - " field in category `%s'"), - idx + 1, "LC_TIME"); - /* Make sure that name and format are adjacent strings - in memory. */ - time->era_entries[idx].name = "\0"; - time->era_entries[idx].format - = time->era_entries[idx].name + 1; + error (0, 0, _("%s: missing era format in string %d" + " in `era' field"), + "LC_TIME", idx + 1); + time->era_entries[idx].name = + time->era_entries[idx].format = ""; } else time->era_entries[idx].format = str; } + + /* Now generate the wide character name and format. */ + wstr = wcschr ((wchar_t *) time->wera, L':'); /* end direction */ + wstr = wstr ? wcschr (wstr, L':') : NULL; /* end offset */ + wstr = wstr ? wcschr (wstr, L':') : NULL; /* end start */ + wstr = wstr ? wcschr (wstr, L':') : NULL; /* end end */ + time->era_entries[idx].wname = (uint32_t *) wstr; + wstr = wstr ? wcschr (wstr, L':') : NULL; /* end name */ + time->era_entries[idx].wformat = (uint32_t *) wstr; } /* Construct the array for the other byte order. */ time->era_entries_ob = - (struct era_data *) xmalloc (time->cur_num_era - * sizeof (struct era_data)); + (struct era_data *) xmalloc (time->num_era * sizeof (struct era_data)); - for (idx = 0; idx < time->cur_num_era; ++idx) + for (idx = 0; idx < time->num_era; ++idx) { time->era_entries_ob[idx].direction = - SWAPU32 (time->era_entries[idx].direction); + bswap_32 (time->era_entries[idx].direction); time->era_entries_ob[idx].offset = - SWAPU32 (time->era_entries[idx].offset); + bswap_32 (time->era_entries[idx].offset); time->era_entries_ob[idx].start_date[0] = - SWAPU32 (time->era_entries[idx].start_date[0]); + bswap_32 (time->era_entries[idx].start_date[0]); time->era_entries_ob[idx].start_date[1] = - SWAPU32 (time->era_entries[idx].start_date[1]); + bswap_32 (time->era_entries[idx].start_date[1]); time->era_entries_ob[idx].start_date[2] = - SWAPU32 (time->era_entries[idx].stop_date[2]); + bswap_32 (time->era_entries[idx].stop_date[2]); time->era_entries_ob[idx].stop_date[0] = - SWAPU32 (time->era_entries[idx].stop_date[0]); + bswap_32 (time->era_entries[idx].stop_date[0]); time->era_entries_ob[idx].stop_date[1] = - SWAPU32 (time->era_entries[idx].stop_date[1]); + bswap_32 (time->era_entries[idx].stop_date[1]); time->era_entries_ob[idx].stop_date[2] = - SWAPU32 (time->era_entries[idx].stop_date[2]); + bswap_32 (time->era_entries[idx].stop_date[2]); time->era_entries_ob[idx].name = time->era_entries[idx].name; time->era_entries_ob[idx].format = time->era_entries[idx].format; + if (time->era_entries[idx].wname != NULL) + { + size_t inner = (wcslen ((wchar_t *) time->era_entries[idx].wname) + + 1); + time->era_entries_ob[idx].wname = xmalloc (inner + * sizeof (uint32_t)); + do + time->era_entries_ob[idx].wname[inner - 1] + = bswap_32 (time->era_entries[idx].wname[inner - 1]); + while (inner-- > 0); + } + else + time->era_entries_ob[idx].wname = NULL; + if (time->era_entries[idx].wformat != NULL) + { + size_t inner + = wcslen ((wchar_t *) time->era_entries[idx].wformat) + 1; + time->era_entries_ob[idx].wformat = xmalloc (inner + * sizeof (uint32_t)); + do + time->era_entries_ob[idx].wformat[inner - 1] + = bswap_32 (time->era_entries[idx].wformat[inner - 1]); + while (inner-- > 0); + } + else + time->era_entries_ob[idx].wformat = NULL; } } + + if (time->week_ndays == 0) + time->week_ndays = 7; + + if (time->week_1stday == 0) + time->week_1stday = 19971130; + + if (time->week_1stweek > time->week_ndays) + error (0, 0, _("\ +%s: third operand for value of field `%s' must not be larger than %d"), + "LC_TIME", "week", 7); + + if (time->first_weekday == '\0') + /* The definition does not specify this so the default is used. */ + time->first_weekday = 1; + else if (time->first_weekday > time->week_ndays) + error (0, 0, _("\ +%s: values of field `%s' must not be larger than %d"), + "LC_TIME", "first_weekday", 7); + + if (time->first_workday == '\0') + /* The definition does not specify this so the default is used. */ + time->first_workday = 1; + else if (time->first_workday > time->week_ndays) + error (0, 0, _("\ +%s: values of field `%s' must not be larger than %d"), + "LC_TIME", "first_workday", 7); + + if (time->cal_direction == '\0') + /* The definition does not specify this so the default is used. */ + time->cal_direction = 1; + else if (time->cal_direction > 3) + error (0, 0, _("\ +%s: values for field `%s' must not be larger than 3"), + "LC_TIME", "cal_direction", 3); + + /* XXX We don't perform any tests on the timezone value since this is + simply useless, stupid $&$!@... */ + if (time->timezone == NULL) + time->timezone = ""; + + /* Generate alt digits in other byte order. */ + for (cnt = 0; cnt < 100; ++cnt) + if (time->walt_digits[cnt] != NULL) + { + size_t len = wcslen ((wchar_t *) time->walt_digits[cnt]) + 1; + uint32_t *wstr = xmalloc (len * sizeof (uint32_t)); + do + wstr[len - 1] = bswap_32 (time->walt_digits[cnt][len - 1]); + while (len-- > 0); + time->walt_digits_ob[cnt] = wstr; + } } void -time_output (struct localedef_t *locale, const char *output_path) +time_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) { struct locale_time_t *time = locale->categories[LC_TIME].time; struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_TIME) - + time->cur_num_era - 1 - + time->cur_num_alt_digits - 1 - + 1 + (time->cur_num_era * 9 - 1) * 2 - + (time->cur_num_era == 0)]; + + time->num_era - 1 + + 3 * 99 + + 1 + (time->num_era * 10 - 1) * 2]; struct locale_file data; - u_int32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_TIME)]; - size_t cnt, last_idx, num; - - if ((locale->binary & (1 << LC_TIME)) != 0) - { - iov[0].iov_base = time; - iov[0].iov_len = locale->len[LC_TIME]; - - write_locale_data (output_path, "LC_TIME", 1, iov); - - return; - } + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_TIME)]; + size_t cnt, last_idx, num, n; + uint32_t num_era_eb; + uint32_t num_era_el; data.magic = LIMAGIC (LC_TIME); data.n = _NL_ITEM_INDEX (_NL_NUM_LC_TIME); @@ -516,7 +658,7 @@ time_output (struct localedef_t *locale, const char *output_path) last_idx = ++cnt; idx[1 + last_idx] = idx[last_idx]; - for (num = 0; num < time->cur_num_era; ++num, ++cnt) + for (num = 0; num < time->num_era; ++num, ++cnt) { iov[2 + cnt].iov_base = (void *) time->era[num]; iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; @@ -537,7 +679,7 @@ time_output (struct localedef_t *locale, const char *output_path) ++last_idx; idx[1 + last_idx] = idx[last_idx]; - for (num = 0; num < time->cur_num_alt_digits; ++num, ++cnt) + for (num = 0; num < 100; ++num, ++cnt) { iov[2 + cnt].iov_base = (void *) (time->alt_digits[num] ?: ""); iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1; @@ -564,15 +706,23 @@ time_output (struct localedef_t *locale, const char *output_path) idx[last_idx] = (idx[last_idx] + 3) & ~3; ++cnt; - iov[2 + cnt].iov_base = (void *) &time->cur_num_alt_digits; - iov[2 + cnt].iov_len = sizeof (u_int32_t); + /* The `era' data in usable form. */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + num_era_eb = bswap_32 (time->num_era); + num_era_el = time->num_era; +#else + num_era_eb = time->num_era; + num_era_el = bswap_32 (time->num_era); +#endif + + iov[2 + cnt].iov_base = (void *) &num_era_eb; + iov[2 + cnt].iov_len = sizeof (uint32_t); idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; ++cnt; ++last_idx; - /* The `era' data in usable form. */ - iov[2 + cnt].iov_base = (void *) &time->cur_num_era; - iov[2 + cnt].iov_len = sizeof (u_int32_t); + iov[2 + cnt].iov_base = (void *) &num_era_el; + iov[2 + cnt].iov_len = sizeof (uint32_t); idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; ++cnt; ++last_idx; @@ -585,7 +735,7 @@ time_output (struct localedef_t *locale, const char *output_path) # define ERA_B2 time->era_entries_ob #endif idx[1 + last_idx] = idx[last_idx]; - for (num = 0; num < time->cur_num_era; ++num) + for (num = 0; num < time->num_era; ++num) { size_t l; @@ -596,10 +746,22 @@ time_output (struct localedef_t *locale, const char *output_path) iov[2 + cnt].iov_len = sizeof (int32_t); ++cnt; iov[2 + cnt].iov_base = (void *) &ERA_B1[num].start_date[0]; - iov[2 + cnt].iov_len = 3 * sizeof (int32_t); + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B1[num].start_date[1]; + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B1[num].start_date[2]; + iov[2 + cnt].iov_len = sizeof (int32_t); ++cnt; iov[2 + cnt].iov_base = (void *) &ERA_B1[num].stop_date[0]; - iov[2 + cnt].iov_len = 3 * sizeof (int32_t); + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B1[num].stop_date[1]; + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B1[num].stop_date[2]; + iov[2 + cnt].iov_len = sizeof (int32_t); ++cnt; l = (strchr (ERA_B1[num].format, '\0') - ERA_B1[num].name) + 1; @@ -611,11 +773,19 @@ time_output (struct localedef_t *locale, const char *output_path) idx[1 + last_idx] += 8 * sizeof (int32_t) + l; assert (idx[1 + last_idx] % 4 == 0); + + iov[2 + cnt].iov_base = (void *) ERA_B1[num].wname; + iov[2 + cnt].iov_len = ((wcschr ((wchar_t *) ERA_B1[cnt].wformat, L'\0') + - (wchar_t *) ERA_B1[num].wname + 1) + * sizeof (uint32_t)); + ++cnt; + + idx[1 + last_idx] += iov[2 + cnt].iov_len; } ++last_idx; - /* idx[1 + last_idx] = idx[last_idx]; */ - for (num = 0; num < time->cur_num_era; ++num) + idx[1 + last_idx] = idx[last_idx]; + for (num = 0; num < time->num_era; ++num) { size_t l; @@ -626,10 +796,22 @@ time_output (struct localedef_t *locale, const char *output_path) iov[2 + cnt].iov_len = sizeof (int32_t); ++cnt; iov[2 + cnt].iov_base = (void *) &ERA_B2[num].start_date[0]; - iov[2 + cnt].iov_len = 3 * sizeof (int32_t); + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B2[num].start_date[1]; + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B2[num].start_date[2]; + iov[2 + cnt].iov_len = sizeof (int32_t); ++cnt; iov[2 + cnt].iov_base = (void *) &ERA_B2[num].stop_date[0]; - iov[2 + cnt].iov_len = 3 * sizeof (int32_t); + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B2[num].stop_date[1]; + iov[2 + cnt].iov_len = sizeof (int32_t); + ++cnt; + iov[2 + cnt].iov_base = (void *) &ERA_B2[num].stop_date[2]; + iov[2 + cnt].iov_len = sizeof (int32_t); ++cnt; l = (strchr (ERA_B2[num].format, '\0') - ERA_B2[num].name) + 1; @@ -638,103 +820,618 @@ time_output (struct localedef_t *locale, const char *output_path) iov[2 + cnt].iov_len = l; ++cnt; - /* idx[1 + last_idx] += 8 * sizeof (int32_t) + l; */ + idx[1 + last_idx] += 8 * sizeof (int32_t) + l; + + iov[2 + cnt].iov_base = (void *) ERA_B1[num].wname; + iov[2 + cnt].iov_len = ((wcschr ((wchar_t *) ERA_B1[cnt].wformat, L'\0') + - (wchar_t *) ERA_B1[num].wname + 1) + * sizeof (uint32_t)); + ++cnt; + + idx[1 + last_idx] += iov[2 + cnt].iov_len; } + ++last_idx; - /* We have a problem when no era data is present. In this case the - data pointer for _NL_TIME_ERA_ENTRIES_EB and - _NL_TIME_ERA_ENTRIES_EL point after the end of the file. So we - introduce some dummy data here. */ - if (time->cur_num_era == 0) +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define WABDAY_B1 wabday_ob +# define WDAY_B1 wday_ob +# define WABMON_B1 wabmon_ob +# define WMON_B1 wmon_ob +# define WAM_PM_B1 wam_pm_ob +# define WD_T_FMT_B1 wd_t_fmt_ob +# define WD_FMT_B1 wd_fmt_ob +# define WT_FMT_B1 wt_fmt_ob +# define WT_FMT_AMPM_B1 wt_fmt_ampm_ob +# define WERA_YEAR_B1 wera_year_ob +# define WERA_D_FMT_B1 wera_d_fmt_ob +# define WALT_DIGITS_B1 walt_digits_ob +# define WERA_D_T_FMT_B1 wera_d_t_fmt_ob +# define WERA_T_FMT_B1 wera_t_fmt_ob +# define WABDAY_B2 wabday +# define WDAY_B2 wday +# define WABMON_B2 wabmon +# define WMON_B2 wmon +# define WAM_PM_B2 wam_pm +# define WD_T_FMT_B2 wd_t_fmt +# define WD_FMT_B2 wd_fmt +# define WT_FMT_B2 wt_fmt +# define WT_FMT_AMPM_B2 wt_fmt_ampm +# define WERA_YEAR_B2 wera_year +# define WERA_D_FMT_B2 wera_d_fmt +# define WALT_DIGITS_B2 walt_digits +# define WERA_D_T_FMT_B2 wera_d_t_fmt +# define WERA_T_FMT_B2 wera_t_fmt +#else +# define WABDAY_B1 wabday +# define WDAY_B1 wday +# define WABMON_B1 wabmon +# define WMON_B1 wmon +# define WAM_PM_B1 wam_pm +# define WD_T_FMT_B1 wd_t_fmt +# define WD_FMT_B1 wd_fmt +# define WT_FMT_B1 wt_fmt +# define WT_FMT_AMPM_B1 wt_fmt_ampm +# define WERA_YEAR_B1 wera_year +# define WERA_D_FMT_B1 wera_d_fmt +# define WALT_DIGITS_B1 walt_digits +# define WERA_D_T_FMT_B1 wera_d_t_fmt +# define WERA_T_FMT_B1 wera_t_fmt +# define WABDAY_B2 wabday_ob +# define WDAY_B2 wday_ob +# define WABMON_B2 wabmon_ob +# define WMON_B2 wmon_ob +# define WAM_PM_B2 wam_pm_ob +# define WD_T_FMT_B2 wd_t_fmt_ob +# define WD_FMT_B2 wd_fmt_ob +# define WT_FMT_B2 wt_fmt_ob +# define WT_FMT_AMPM_B2 wt_fmt_ampm_ob +# define WERA_YEAR_B2 wera_year_ob +# define WERA_D_FMT_B2 wera_d_fmt_ob +# define WALT_DIGITS_B2 walt_digits_ob +# define WERA_D_T_FMT_B2 wera_d_t_fmt_ob +# define WERA_T_FMT_B2 wera_t_fmt_ob +#endif + + /* The wide character ab'days. */ + for (n = 0; n < 7; ++n, ++cnt, ++last_idx) { - static u_int32_t dummy = 0; - iov[2 + cnt].iov_base = (void *) &dummy; - iov[2 + cnt].iov_len = 4; - ++cnt; + iov[2 + cnt].iov_base = + (void *) (time->WABDAY_B1[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + for (n = 0; n < 7; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WABDAY_B2[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + + /* The wide character days. */ + for (n = 0; n < 7; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WDAY_B1[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + for (n = 0; n < 7; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WDAY_B2[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + + /* The wide character ab'mons. */ + for (n = 0; n < 12; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WABMON_B1[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + for (n = 0; n < 12; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WABMON_B2[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + + /* The wide character mons. */ + for (n = 0; n < 12; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WMON_B1[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + for (n = 0; n < 12; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WMON_B2[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + + /* Wide character AM/PM. */ + for (n = 0; n < 2; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WAM_PM_B1[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + for (n = 0; n < 2; ++n, ++cnt, ++last_idx) + { + iov[2 + cnt].iov_base = + (void *) (time->WAM_PM_B2[n] ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + } + + iov[2 + cnt].iov_base = (void *) (time->WD_T_FMT_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WD_FMT_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WT_FMT_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WT_FMT_AMPM_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WD_T_FMT_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WD_FMT_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WT_FMT_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WT_FMT_AMPM_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_YEAR_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_D_FMT_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + idx[1 + last_idx] = idx[last_idx]; + for (num = 0; num < 100; ++num, ++cnt) + { + iov[2 + cnt].iov_base = (void *) (time->WALT_DIGITS_B2[num] + ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] += iov[2 + cnt].iov_len; } + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_D_T_FMT_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_T_FMT_B2 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_YEAR_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_D_FMT_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + idx[1 + last_idx] = idx[last_idx]; + for (num = 0; num < 100; ++num, ++cnt) + { + iov[2 + cnt].iov_base = (void *) (time->WALT_DIGITS_B1[num] + ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] += iov[2 + cnt].iov_len; + } + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_D_T_FMT_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) (time->WERA_T_FMT_B1 ?: empty_wstr); + iov[2 + cnt].iov_len = ((wcslen (iov[2 + cnt].iov_base) + 1) + * sizeof (uint32_t)); + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) &time->week_ndays; + iov[2 + cnt].iov_len = 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) &time->week_1stday; + iov[2 + cnt].iov_len = 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) &time->week_1stweek; + iov[2 + cnt].iov_len = 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) &time->first_weekday; + iov[2 + cnt].iov_len = 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) &time->first_workday; + iov[2 + cnt].iov_len = 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) &time->cal_direction; + iov[2 + cnt].iov_len = 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; + + iov[2 + cnt].iov_base = (void *) time->timezone; + iov[2 + cnt].iov_len = strlen (time->timezone) + 1; + idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len; + ++cnt; + ++last_idx; assert (cnt == (_NL_ITEM_INDEX (_NL_NUM_LC_TIME) - + time->cur_num_era - 1 - + time->cur_num_alt_digits - 1 - + 1 + (time->cur_num_era * 9 - 1) * 2 - + (time->cur_num_era == 0)) - && last_idx + 1 == _NL_ITEM_INDEX (_NL_NUM_LC_TIME)); + + time->num_era - 1 + + 3 * 99 + + 1 + (time->num_era * 10 - 1) * 2)); + assert (last_idx == _NL_ITEM_INDEX (_NL_NUM_LC_TIME)); write_locale_data (output_path, "LC_TIME", 2 + cnt, iov); } +/* The parser for the LC_TIME section of the locale definition. */ void -time_add (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) +time_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) { - struct locale_time_t *time = locale->categories[LC_TIME].time; + struct repertoire_t *repertoire = NULL; + struct locale_time_t *time; + struct token *now; + enum token_t nowtok; + size_t cnt; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_TIME' must be free. */ + lr_ignore_rest (ldfile, 1); - switch (tok) + + do { -#define STRARR_ELEM(cat, max) \ - case tok_##cat: \ - if (time->cur_num_##cat >= max) \ - lr_error (lr, _("\ -too many values for field `%s' in category `%s'"), \ - #cat, "LC_TIME"); \ - else if (code->val.str.start == NULL) \ - { \ - lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ - #cat, "LC_TIME"); \ - time->cat[time->cur_num_##cat++] = ""; \ - } \ - else \ - time->cat[time->cur_num_##cat++] = code->val.str.start; \ - break - - STRARR_ELEM (abday, 7); - STRARR_ELEM (day, 7); - STRARR_ELEM (abmon, 12); - STRARR_ELEM (mon, 12); - STRARR_ELEM (am_pm, 2); - STRARR_ELEM (alt_digits, 100); - - case tok_era: - if (code->val.str.start == NULL) - lr_error (lr, _("unknown character in field `%s' of category `%s'"), - "era", "LC_TIME"); - else + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_time, LC_TIME, + "LC_TIME", ignore_content); + return; + } + + /* Prepare the data structures. */ + time_startup (ldfile, result, ignore_content); + time = result->categories[LC_TIME].time; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) { - ++time->cur_num_era; - time->era = xrealloc (time->era, - time->cur_num_era * sizeof (char *)); - time->era[time->cur_num_era - 1] = code->val.str.start; + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; } - break; - -#define STR_ELEM(cat) \ - case tok_##cat: \ - if (time->cat != NULL) \ - lr_error (lr, _("\ -field `%s' in category `%s' declared more than once"), \ - #cat, "LC_TIME"); \ - else if (code->val.str.start == NULL) \ - { \ - lr_error (lr, _("unknown character in field `%s' of category `%s'"),\ - #cat, "LC_TIME"); \ - time->cat = ""; \ - } \ - else \ - time->cat = code->val.str.start; \ - break - - STR_ELEM (d_t_fmt); - STR_ELEM (d_fmt); - STR_ELEM (t_fmt); - STR_ELEM (t_fmt_ampm); - STR_ELEM (era_year); - STR_ELEM (era_d_t_fmt); - STR_ELEM (era_d_fmt); - STR_ELEM (era_t_fmt); - - default: - assert (! "unknown token in category `LC_TIME': should not happen"); + + switch (nowtok) + { +#define STRARR_ELEM(cat, min, max) \ + case tok_##cat: \ + for (cnt = 0; cnt < max; ++cnt) \ + { \ + now = lr_token (ldfile, charmap, repertoire); \ + if (now->tok == tok_eol) \ + { \ + if (cnt < min) \ + lr_error (ldfile, _("%s: too few values for field `%s'"), \ + "LC_TIME", #cat); \ + if (!ignore_content) \ + do \ + { \ + time->cat[cnt] = ""; \ + time->w##cat[cnt] = empty_wstr; \ + } \ + while (++cnt < max); \ + break; \ + } \ + else if (now->tok != tok_string) \ + goto err_label; \ + else if (!ignore_content && (now->val.str.startmb == NULL \ + || now->val.str.startwc == NULL)) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TIME", #cat); \ + time->cat[cnt] = ""; \ + time->w##cat[cnt] = empty_wstr; \ + } \ + else if (!ignore_content) \ + { \ + time->cat[cnt] = now->val.str.startmb; \ + time->w##cat[cnt] = now->val.str.startwc; \ + } \ + \ + /* Match the semicolon. */ \ + now = lr_token (ldfile, charmap, NULL); \ + if (now->tok != tok_semicolon && now->tok != tok_eol) \ + break; \ + } \ + if (now->tok != tok_eol) \ + { \ + while (!ignore_content && cnt < min) \ + { \ + time->cat[cnt] = ""; \ + time->w##cat[cnt++] = empty_wstr; \ + } \ + \ + if (now->tok == tok_semicolon) \ + { \ + now = lr_token (ldfile, charmap, NULL); \ + if (now->tok == tok_eol) \ + lr_error (ldfile, _("extra trailing semicolon")); \ + else if (now->tok == tok_string) \ + { \ + lr_error (ldfile, _("\ +%s: too many values for field `%s'"), \ + "LC_TIME", #cat); \ + lr_ignore_rest (ldfile, 0); \ + } \ + else \ + goto err_label; \ + } \ + else \ + goto err_label; \ + } \ + time->cat##_defined = 1; \ + break + + STRARR_ELEM (abday, 7, 7); + STRARR_ELEM (day, 7, 7); + STRARR_ELEM (abmon, 12, 12); + STRARR_ELEM (mon, 12, 12); + STRARR_ELEM (am_pm, 2, 2); + STRARR_ELEM (alt_digits, 0, 100); + + case tok_era: + do + { + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_string) + goto err_label; + if (!ignore_content && (now->val.str.startmb == NULL + || now->val.str.startwc == NULL)) + { + lr_error (ldfile, _("%s: unknown character in field `%s'"), + "LC_TIME", "era"); + lr_ignore_rest (ldfile, 0); + break; + } + + if (!ignore_content) + { + time->era = xrealloc (time->era, + (time->num_era + 1) * sizeof (char *)); + time->era[time->num_era] = now->val.str.startmb; + + time->wera = xrealloc (time->wera, + (time->num_era + 1) + * sizeof (char *)); + time->wera[time->num_era++] = now->val.str.startwc; + } + + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_eof && now->tok != tok_semicolon) + goto err_label; + } + while (now->tok == tok_semicolon); + break; + +#define STR_ELEM(cat) \ + case tok_##cat: \ + now = lr_token (ldfile, charmap, NULL); \ + if (now->tok != tok_string) \ + goto err_label; \ + else if (time->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_TIME", #cat); \ + else if (!ignore_content && (now->val.str.startmb == NULL \ + || now->val.str.startwc == NULL)) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TIME", #cat); \ + time->cat = ""; \ + time->w##cat = empty_wstr; \ + } \ + else if (!ignore_content) \ + { \ + time->cat = now->val.str.startmb; \ + time->w##cat = now->val.str.startwc; \ + } \ + break + + STR_ELEM (d_t_fmt); + STR_ELEM (d_fmt); + STR_ELEM (t_fmt); + STR_ELEM (t_fmt_ampm); + STR_ELEM (era_year); + STR_ELEM (era_d_t_fmt); + STR_ELEM (era_d_fmt); + STR_ELEM (era_t_fmt); + STR_ELEM (timezone); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + now = lr_token (ldfile, charmap, NULL); \ + if (now->tok != tok_number) \ + goto err_label; \ + else if (time->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_TIME", #cat); \ + else if (!ignore_content) \ + time->cat = now->val.num; \ + break + + INT_ELEM (first_weekday); + INT_ELEM (first_workday); + INT_ELEM (cal_direction); + + case tok_week: + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_number) + goto err_label; + time->week_ndays = now->val.num; + + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + goto err_label; + + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_number) + goto err_label; + time->week_1stday = now->val.num; + + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + goto err_label; + + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_number) + goto err_label; + time->week_1stweek = now->val.num; + + lr_ignore_rest (ldfile, 1); + break; + + case tok_end: + /* Next we assume `LC_TIME'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TIME"); + else if (now->tok != tok_lc_time) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_TIME"); + lr_ignore_rest (ldfile, now->tok == tok_lc_time); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_TIME"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_TIME"); } diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c index 31278d6..99ed0f2 100644 --- a/locale/programs/linereader.c +++ b/locale/programs/linereader.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -28,22 +28,20 @@ #include <stdlib.h> #include <string.h> +#include "charmap.h" #include "error.h" #include "linereader.h" -#include "charset.h" +#include "localedef.h" #include "stringtrans.h" -void *xmalloc (size_t __n); -void *xrealloc (void *__p, size_t __n); -char *xstrdup (const char *__str); - - +/* Prototypes for local functions. */ static struct token *get_toplvl_escape (struct linereader *lr); static struct token *get_symname (struct linereader *lr); static struct token *get_ident (struct linereader *lr); static struct token *get_string (struct linereader *lr, - const struct charset_t *charset); + const struct charmap_t *charmap, + const struct repertoire_t *repertoire); struct linereader * @@ -126,9 +124,14 @@ lr_next (struct linereader *lr) if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n') { +#if 0 + /* XXX Is this correct? */ /* An escaped newline character is substituted with a single <SP>. */ --n; lr->buf[n - 1] = ' '; +#else + n -= 2; +#endif } lr->buf[n] = '\0'; @@ -149,7 +152,8 @@ extern char *program_name; struct token * -lr_token (struct linereader *lr, const struct charset_t *charset) +lr_token (struct linereader *lr, const struct charmap_t *charmap, + const struct repertoire_t *repertoire) { int ch; @@ -193,12 +197,29 @@ lr_token (struct linereader *lr, const struct charset_t *charset) return get_toplvl_escape (lr); /* Match ellipsis. */ - if (ch == '.' && strncmp (&lr->buf[lr->idx], "..", 2) == 0) + if (ch == '.') { - lr_getc (lr); - lr_getc (lr); - lr->token.tok = tok_ellipsis; - return &lr->token; + if (strncmp (&lr->buf[lr->idx], "...", 3) == 0) + { + lr_getc (lr); + lr_getc (lr); + lr_getc (lr); + lr->token.tok = tok_ellipsis4; + return &lr->token; + } + if (strncmp (&lr->buf[lr->idx], "..", 2) == 0) + { + lr_getc (lr); + lr_getc (lr); + lr->token.tok = tok_ellipsis3; + return &lr->token; + } + if (lr->buf[lr->idx] == '.') + { + lr_getc (lr); + lr->token.tok = tok_ellipsis2; + return &lr->token; + } } switch (ch) @@ -238,7 +259,7 @@ lr_token (struct linereader *lr, const struct charset_t *charset) return &lr->token; case '"': - return get_string (lr, charset); + return get_string (lr, charmap, repertoire); case '-': ch = lr_getc (lr); @@ -261,7 +282,7 @@ get_toplvl_escape (struct linereader *lr) /* This is supposed to be a numeric value. We return the numerical value and the number of bytes. */ size_t start_idx = lr->idx - 1; - unsigned int value = 0; + char *bytes = lr->token.val.charcode.bytes; int nbytes = 0; int ch; @@ -287,11 +308,11 @@ get_toplvl_escape (struct linereader *lr) || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) { esc_error: - lr->token.val.str.start = &lr->buf[start_idx]; + lr->token.val.str.startmb = &lr->buf[start_idx]; while (ch != EOF && !isspace (ch)) ch = lr_getc (lr); - lr->token.val.str.len = lr->idx - start_idx; + lr->token.val.str.lenmb = lr->idx - start_idx; lr->token.tok = tok_error; return &lr->token; @@ -300,7 +321,7 @@ get_toplvl_escape (struct linereader *lr) if (isdigit (ch)) byte = ch - '0'; else - byte = _tolower (ch) - 'a' + 10; + byte = tolower (ch) - 'a' + 10; ch = lr_getc (lr); if ((base == 16 && !isxdigit (ch)) @@ -311,7 +332,7 @@ get_toplvl_escape (struct linereader *lr) if (isdigit (ch)) byte += ch - '0'; else - byte += _tolower (ch) - 'a' + 10; + byte += tolower (ch) - 'a' + 10; ch = lr_getc (lr); if (base != 16 && isdigit (ch)) @@ -322,10 +343,7 @@ get_toplvl_escape (struct linereader *lr) ch = lr_getc (lr); } - value *= 256; - value += byte; - - ++nbytes; + bytes[nbytes++] = byte; } while (ch == lr->escape_char && nbytes < 4); @@ -335,23 +353,52 @@ get_toplvl_escape (struct linereader *lr) lr_ungetn (lr, 1); lr->token.tok = tok_charcode; - lr->token.val.charcode.val = value; lr->token.val.charcode.nbytes = nbytes; return &lr->token; } -#define ADDC(ch) \ - do \ - { \ - if (bufact == bufmax) \ - { \ - bufmax *= 2; \ - buf = xrealloc (buf, bufmax); \ - } \ - buf[bufact++] = (ch); \ - } \ +#define ADDC(ch) \ + do \ + { \ + if (bufact == bufmax) \ + { \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + buf[bufact++] = (ch); \ + } \ + while (0) + + +#define ADDS(s, l) \ + do \ + { \ + size_t _l = (l); \ + if (bufact + _l > bufmax) \ + { \ + if (bufact < _l) \ + bufact = _l; \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + memcpy (&buf[bufact], s, _l); \ + bufact += _l; \ + } \ + while (0) + + +#define ADDWC(ch) \ + do \ + { \ + if (buf2act == buf2max) \ + { \ + buf2max *= 2; \ + buf2 = xrealloc (buf2, buf2max * 4); \ + } \ + buf2[buf2act++] = (ch); \ + } \ while (0) @@ -399,9 +446,8 @@ get_symname (struct linereader *lr) if (cp == &buf[bufact - 1]) { /* Yes, it is. */ - lr->token.tok = bufact == 6 ? tok_ucs2 : tok_ucs4; - lr->token.val.charcode.val = strtoul (buf, NULL, 16); - lr->token.val.charcode.nbytes = lr->token.tok == tok_ucs2 ? 2 : 4; + lr->token.tok = tok_ucs4; + lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16); return &lr->token; } @@ -422,8 +468,8 @@ get_symname (struct linereader *lr) buf[bufact] = '\0'; buf = xrealloc (buf, bufact + 1); - lr->token.val.str.start = buf; - lr->token.val.str.len = bufact - 1; + lr->token.val.str.startmb = buf; + lr->token.val.str.lenmb = bufact - 1; } return &lr->token; @@ -446,8 +492,18 @@ get_ident (struct linereader *lr) while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' && ch != '<' && ch != ',') - /* XXX Handle escape sequences? */ - ADDC (ch); + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + { + lr_error (lr, _("invalid escape sequence")); + break; + } + } + ADDC (ch); + } lr_ungetn (lr, 1); @@ -465,8 +521,8 @@ get_ident (struct linereader *lr) buf[bufact] = '\0'; buf = xrealloc (buf, bufact + 1); - lr->token.val.str.start = buf; - lr->token.val.str.len = bufact; + lr->token.val.str.startmb = buf; + lr->token.val.str.lenmb = bufact; } return &lr->token; @@ -474,113 +530,247 @@ get_ident (struct linereader *lr) static struct token * -get_string (struct linereader *lr, const struct charset_t *charset) +get_string (struct linereader *lr, const struct charmap_t *charmap, + const struct repertoire_t *repertoire) { - int illegal_string = 0; - char *buf, *cp; + int return_widestr = lr->return_widestr; + char *buf; + char *buf2 = NULL; size_t bufact; size_t bufmax = 56; - int ch; + /* We must return two different strings. */ buf = xmalloc (bufmax); bufact = 0; - while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) - if (ch != '<' || charset == NULL) - { - if (ch == lr->escape_char) - { - ch = lr_getc (lr); - if (ch == '\n' || ch == EOF) - break; - } + /* We know it'll be a string. */ + lr->token.tok = tok_string; + + /* If we need not translate the strings (i.e., expand <...> parts) + we can run a simple loop. */ + if (!lr->translate_strings) + { + int ch; + + buf2 = NULL; + while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) ADDC (ch); - } - else - { - /* We have to get the value of the symbol. */ - unsigned int value; - size_t startidx = bufact; - - if (!lr->translate_strings) - ADDC ('<'); - - while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) - { - if (ch == lr->escape_char) - { - ch = lr_getc (lr); - if (ch == '\n' || ch == EOF) - break; - } - ADDC (ch); - } - - if (ch == '\n' || ch == EOF) - lr_error (lr, _("unterminated string")); - else - if (!lr->translate_strings) - ADDC ('>'); - - if (lr->translate_strings) - { - value = charset_find_value (&charset->char_table, &buf[startidx], - bufact - startidx); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + + /* Catch errors with trailing escape character. */ + if (bufact > 0 && buf[bufact - 1] == lr->escape_char + && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) + { + lr_error (lr, _("illegal escape sequence at end of string")); + --bufact; + } + else if (ch == '\n' || ch == EOF) + lr_error (lr, _("unterminated string")); + + ADDC ('\0'); + } + else + { + int illegal_string = 0; + size_t buf2act = 0; + size_t buf2max = 56 * sizeof (uint32_t); + int ch; + int warned = 0; + + /* We have to provide the wide character result as well. */ + if (return_widestr) + buf2 = xmalloc (buf2max); + + /* Read until the end of the string (or end of the line or file). */ + while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) + { + size_t startidx; + uint32_t wch; + struct charseq *seq; + + if (ch != '<') + { + /* The standards leave it up to the implementation to decide + what to do with character which stand for themself. We + could jump through hoops to find out the value relative to + the charmap and the repertoire map, but instead we leave + it up to the locale definition author to write a better + definition. We assume here that every character which + stands for itself is encoded using ISO 8859-1. Using the + escape character is allowed. */ + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + + if (verbose && !warned) + { + lr_error (lr, _("\ +non-symbolic character value should not be used")); + warned = 1; + } + + ADDC (ch); + if (return_widestr) + ADDWC ((uint32_t) ch); + + continue; + } + + /* Now we have to search for the end of the symbolic name, i.e., + the closing '>'. */ + startidx = bufact; + while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + ADDC (ch); + } + if (ch == '\n' || ch == EOF) + /* Not a correct string. */ + break; + if (bufact == startidx) + { + /* <> is no correct name. Ignore it and also signal an + error. */ illegal_string = 1; - bufact = startidx; + continue; + } - if (bufmax - bufact < 8) - { - bufmax *= 2; - buf = (char *) xrealloc (buf, bufmax); - } + /* It might be a Uxxxx symbol. */ + if (buf[startidx] == 'U' + && (bufact - startidx == 5 || bufact - startidx == 9)) + { + char *cp = buf + startidx + 1; + while (cp < &buf[bufact] && isxdigit (*cp)) + ++cp; + + if (cp == &buf[bufact]) + { + const char *symbol = NULL; + + /* Yes, it is. */ + ADDC ('\0'); + wch = strtoul (buf + startidx + 1, NULL, 16); + + /* Now forget about the name we just added. */ + bufact = startidx; + + if (return_widestr) + ADDWC (wch); + + /* Now determine from the repertoire the name of the + character and find it in the charmap. */ + if (repertoire != NULL) + symbol = repertoire_find_symbol (repertoire, wch); + + if (symbol == NULL) + { + /* We cannot generate a string since we cannot map + from the Unicode number to the character symbol. */ + lr_error (lr, + _("character <U%0*X> not in repertoire map"), + wch > 0xffff ? 8 : 4, wch); + + illegal_string = 1; + } + else + { + seq = charmap_find_value (charmap, symbol, + strlen (symbol)); + + if (seq == NULL) + { + /* Not a known name. */ + lr_error (lr, + _("symbol `%s' not in charmap"), symbol); + illegal_string = 1; + } + else + ADDS (seq->bytes, seq->nbytes); + } + + continue; + } + } + + if (return_widestr) + { + /* We now have the symbolic name in buf[startidx] to + buf[bufact-1]. Now find out the value for this + character in the repertoire map as well as in the + charmap (in this order). */ + wch = repertoire_find_value (repertoire, &buf[startidx], + bufact - startidx); + if (wch == ILLEGAL_CHAR_VALUE) + { + /* This name is not in the repertoire map. */ + lr_error (lr, _("symbol `%.*s' not in repertoire map"), + bufact - startidx, &buf[startidx]); + illegal_string = 1; + } + else + ADDWC (wch); + } + + /* Now the same for the multibyte representation. */ + seq = charmap_find_value (charmap, &buf[startidx], + bufact - startidx); - cp = &buf[bufact]; - if (encode_char (value, &cp)) + if (seq == NULL) + { + /* This name is not in the charmap. */ + lr_error (lr, _("symbol `%.*s' not in charmap"), + bufact - startidx, &buf[startidx]); illegal_string = 1; - bufact = cp - buf; - } - } + /* Now forget about the name we just added. */ + bufact = startidx; + } + else + { + /* Now forget about the name we just added. */ + bufact = startidx; - /* Catch errors with trailing escape character. */ - if (bufact > 0 && buf[bufact - 1] == lr->escape_char - && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) - { - lr_error (lr, _("illegal escape sequence at end of string")); - --bufact; - } - else if (ch == '\n' || ch == EOF) - lr_error (lr, _("unterminated string")); + ADDS (seq->bytes, seq->nbytes); + } + } - /* Terminate string if necessary. */ - if (lr->translate_strings) - { - cp = &buf[bufact]; - if (encode_char (0, &cp)) - illegal_string = 1; + if (ch == '\n' || ch == EOF) + { + lr_error (lr, _("unterminated string")); + illegal_string = 1; + } - bufact = cp - buf; - } - else - ADDC ('\0'); + if (illegal_string) + { + free (buf); + if (buf2 != NULL) + free (buf2); + lr->token.val.str.startmb = NULL; + lr->token.val.str.lenmb = 0; - lr->token.tok = tok_string; + return &lr->token; + } - if (illegal_string) - { - free (buf); - lr->token.val.str.start = NULL; - lr->token.val.str.len = 0; - } - else - { - buf = xrealloc (buf, bufact + 1); + ADDC ('\0'); - lr->token.val.str.start = buf; - lr->token.val.str.len = bufact; + if (return_widestr) + { + ADDWC (0); + lr->token.val.str.startwc = xrealloc (buf2, + buf2act * sizeof (uint32_t)); + lr->token.val.str.lenwc = buf2act; + } } + lr->token.val.str.startmb = xrealloc (buf, bufact); + lr->token.val.str.lenmb = bufact; + return &lr->token; } diff --git a/locale/programs/linereader.h b/locale/programs/linereader.h index 6f81b81..1c98f68 100644 --- a/locale/programs/linereader.h +++ b/locale/programs/linereader.h @@ -1,6 +1,6 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + Contributed by Ulrich Drepper, <drepper@gnu.org>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -22,13 +22,16 @@ #include <ctype.h> #include <libintl.h> +#include <stdint.h> #include <stdio.h> +#include "charmap.h" #include "error.h" #include "locfile-token.h" +#include "repertoire.h" -typedef const struct keyword_t *(*kw_hash_fct_t) (const char *, int); +typedef const struct keyword_t *(*kw_hash_fct_t) (const char *, unsigned int); struct charset_t; @@ -39,15 +42,20 @@ struct token { struct { - char *start; - size_t len; + char *startmb; + size_t lenmb; + uint32_t *startwc; + size_t lenwc; } str; unsigned long int num; struct { - unsigned int val; + /* This element is sized on the safe expectation that no single + character in any character set uses more then 16 bytes. */ + unsigned char bytes[16]; int nbytes; } charcode; + uint32_t ucs4; } val; }; @@ -69,18 +77,20 @@ struct linereader struct token token; int translate_strings; + int return_widestr; kw_hash_fct_t hash_fct; }; /* Functions defined in linereader.c. */ -struct linereader *lr_open (const char *fname, kw_hash_fct_t hf); -int lr_eof (struct linereader *lr); -void lr_close (struct linereader *lr); -int lr_next (struct linereader *lr); -struct token *lr_token (struct linereader *lr, - const struct charset_t *charset); +extern struct linereader *lr_open (const char *fname, kw_hash_fct_t hf); +extern int lr_eof (struct linereader *lr); +extern void lr_close (struct linereader *lr); +extern int lr_next (struct linereader *lr); +extern struct token *lr_token (struct linereader *lr, + const struct charmap_t *charmap, + const struct repertoire_t *repertoire); #define lr_error(lr, fmt, args...) \ diff --git a/locale/programs/locale-spec.c b/locale/programs/locale-spec.c index 9ba49f0..368306c 100644 --- a/locale/programs/locale-spec.c +++ b/locale/programs/locale-spec.c @@ -109,7 +109,7 @@ locale_special (const char *name, int show_category_name, { printf ("%s<%s>", first ? "" : ",", &__collate_symbol_strings[__collate_symbol_hash[2 * cnt]]); -#if 1 +#if 0 { size_t idx = __collate_symbol_hash[2 * cnt + 1]; size_t cls; diff --git a/locale/programs/locale.c b/locale/programs/locale.c index 9d6a931..775500e 100644 --- a/locale/programs/locale.c +++ b/locale/programs/locale.c @@ -1,7 +1,7 @@ /* Implementation of the locale program according to POSIX 9945-2. - Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -129,7 +129,7 @@ struct category /* We have all categories defined in `categories.def'. Now construct the description and data structure used for all categories. */ #define DEFINE_ELEMENT(Item, More...) { Item, ## More }, -#define DEFINE_CATEGORY(category, name, items, postload, in, check, out) \ +#define DEFINE_CATEGORY(category, name, items, postload) \ static struct cat_item category##_desc[] = \ { \ NO_PAREN items \ @@ -140,7 +140,7 @@ struct category static struct category category[] = { -#define DEFINE_CATEGORY(category, name, items, postload, in, check, out) \ +#define DEFINE_CATEGORY(category, name, items, postload) \ [category] = { _NL_NUM_##category, name, NELEMS (category##_desc), \ category##_desc }, #include "categories.def" diff --git a/locale/programs/localedef.c b/locale/programs/localedef.c index 69e9dc5..5eadbf3 100644 --- a/locale/programs/localedef.c +++ b/locale/programs/localedef.c @@ -1,6 +1,6 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -30,26 +30,16 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#ifdef _POSIX2_LOCALEDEF -# include <sys/mman.h> -#endif +#include <sys/mman.h> #include <sys/stat.h> #include "error.h" -#include "charset.h" +#include "charmap.h" #include "locfile.h" -#include "locales.h" - -/* This is a special entry of the copylist. For all categories we don't - have a definition we use the data for the POSIX locale. */ -struct copy_def_list_t copy_posix = -{ - next: NULL, - name: "POSIX", - mask: (1 << LC_ALL) - 1, - locale: NULL -}; +/* Undefine the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> /* List of copied locales. */ @@ -74,7 +64,10 @@ static const char *charmap_file; static const char *input_file; /* Name of the repertoire map file. */ -const char *repertoiremap; +const char *repertoire_global; + +/* List of all locales. */ +static struct localedef_t *locales; /* Name and version of program. */ @@ -88,11 +81,10 @@ void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; static const struct argp_option options[] = { { NULL, 0, NULL, 0, N_("Input Files:") }, - { "charmap", 'f', N_("FILE"), 0, + { "charmap", 'f', "FILE", 0, N_("Symbolic character names defined in FILE") }, - { "inputfile", 'i', N_("FILE"), 0, - N_("Source definitions are found in FILE") }, - { "repertoire-map", 'u', N_("FILE"), 0, + { "inputfile", 'i', "FILE", 0, N_("Source definitions are found in FILE") }, + { "repertoire-map", 'u', "FILE", 0, N_("FILE contains mapping from symbolic names to UCS4 values") }, { NULL, 0, NULL, 0, N_("Output control:") }, @@ -125,7 +117,7 @@ static struct argp argp = /* Prototypes for global functions. */ -void *xmalloc (size_t __n); +extern void *xmalloc (size_t __n); /* Prototypes for local functions. */ static void error_print (void); @@ -138,12 +130,12 @@ main (int argc, char *argv[]) { const char *output_path; int cannot_write_why; - struct charset_t *charset; - struct localedef_t *localedef; - struct copy_def_list_t *act_add_locdef; + struct charmap_t *charmap; + struct localedef_t global; int remaining; /* Set initial values for global variables. */ + copy_list = NULL; posix_conformance = getenv ("POSIXLY_CORRECT") != NULL; error_print_progname = error_print; @@ -187,135 +179,37 @@ main (int argc, char *argv[]) error (3, 0, _("FATAL: system does not define `_POSIX2_LOCALEDEF'")); /* Process charmap file. */ - charset = charmap_read (charmap_file); + charmap = charmap_read (charmap_file); + + /* Add the first entry in the locale list. */ + memset (&global, '\0', sizeof (struct localedef_t)); + global.name = input_file; + global.needed = ALL_LOCALES; + locales = &global; /* Now read the locale file. */ - localedef = locfile_read (input_file, charset); - if (localedef->failed != 0) + if (locfile_read (&global, charmap) != 0) error (4, errno, _("cannot open locale definition file `%s'"), input_file); - /* Make sure all categories are defined. */ - copy_posix.next = copy_list; - copy_list = ©_posix; - - /* Perhaps we saw some `copy' instructions. Process the given list. - We use a very simple algorithm: we look up the list from the - beginning every time. */ - do + /* Perhaps we saw some `copy' instructions. */ + while (1) { - int cat = 0; + struct localedef_t *runp = locales; - for (act_add_locdef = copy_list; act_add_locdef != NULL; - act_add_locdef = act_add_locdef->next) - { - for (cat = LC_CTYPE; cat <= LC_MESSAGES; ++cat) - if ((act_add_locdef->mask & (1 << cat)) != 0) - { - act_add_locdef->mask &= ~(1 << cat); - break; - } - if (cat <= LC_MESSAGES) - break; - } + while (runp != NULL && runp->needed == runp->avail) + runp = runp->next; - if (act_add_locdef != NULL) - { - int avail = 0; - - if (act_add_locdef->locale == NULL) - { - /* Saving the mask is an ugly trick to prevent the reader - from modifying `copy_posix' if we currently process it. */ - int save_mask = act_add_locdef->mask; - act_add_locdef->locale = locfile_read (act_add_locdef->name, - charset); - act_add_locdef->mask = save_mask; - } - - if (! act_add_locdef->locale->failed) - { - avail = act_add_locdef->locale->categories[cat].generic != NULL; - if (avail) - { - localedef->categories[cat].generic - = act_add_locdef->locale->categories[cat].generic; - localedef->avail |= 1 << cat; - } - } - - if (! avail) - { - static const char *locale_names[] = - { - "LC_COLLATE", "LC_CTYPE", "LC_MONETARY", - "LC_NUMERIC", "LC_TIME", "LC_MESSAGES" - }; - char *fname; - int fd; - struct stat st; - - asprintf (&fname, LOCALEDIR "/%s/%s", act_add_locdef->name, - locale_names[cat]); - fd = open (fname, O_RDONLY); - if (fd == -1) - { - free (fname); - - asprintf (&fname, LOCALEDIR "/%s/%s/SYS_%s", - act_add_locdef->name, locale_names[cat], - locale_names[cat]); - - fd = open (fname, O_RDONLY); - if (fd == -1) - error (5, 0, _("\ -locale file `%s', used in `copy' statement, not found"), - act_add_locdef->name); - } - - if (fstat (fd, &st) < 0) - error (5, errno, _("\ -cannot `stat' locale file `%s'"), - fname); - - localedef->len[cat] = st.st_size; -#ifdef _POSIX_MAPPED_FILES - localedef->categories[cat].generic - = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - - if (localedef->categories[cat].generic == MAP_FAILED) -#endif /* _POSIX_MAPPED_FILES */ - { - size_t left = st.st_size; - void *read_ptr; - - localedef->categories[cat].generic - = xmalloc (st.st_size); - read_ptr = localedef->categories[cat].generic; - - do - { - long int n; - n = read (fd, read_ptr, left); - if (n == -1) - error (5, errno, _("cannot read locale file `%s'"), - fname); - read_ptr += n; - left -= n; - } - while (left > 0); - } - - close (fd); - free (fname); - - localedef->binary |= 1 << cat; - } - } + if (runp == NULL) + /* Everything read. */ + break; + + if (locfile_read (runp, charmap) != 0) + error (4, errno, _("cannot open locale definition file `%s'"), + runp->name); } - while (act_add_locdef != NULL); /* Check the categories we processed in source form. */ - check_all_categories (localedef, charset); + check_all_categories (locales, charmap); /* We are now able to write the data files. If warning were given we do it only if it is explicitly requested (--force). */ @@ -325,7 +219,7 @@ cannot `stat' locale file `%s'"), error (4, cannot_write_why, _("cannot write output files to `%s'"), output_path); else - write_all_categories (localedef, charset, output_path); + write_all_categories (locales, charmap, output_path); } else error (4, 0, _("no output file produced because warning were issued")); @@ -357,7 +251,7 @@ parse_opt (int key, char *arg, struct argp_state *state) input_file = arg; break; case 'u': - repertoiremap = arg; + repertoire_global = arg; break; case 'v': verbose = 1; @@ -406,50 +300,11 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ } -void -def_to_process (const char *name, int category) -{ - struct copy_def_list_t *new, **rp; - - for (rp = ©_list; *rp != NULL; rp = &(*rp)->next) - if (strcmp (name, (*rp)->name) == 0) - break; - - if (*rp == NULL) - { - size_t cnt; - - *rp = (struct copy_def_list_t *) xmalloc (sizeof (**rp)); - - (*rp)->next = NULL; - (*rp)->name = name; - (*rp)->mask = 0; - (*rp)->locale = NULL; - - for (cnt = 0; cnt < 6; ++cnt) - { - (*rp)->binary[cnt].data = NULL; - (*rp)->binary[cnt].len = 0; - } - } - new = *rp; - - if ((new->mask & category) != 0) - /* We already have the information. This cannot happen. */ - error (5, 0, _("\ -category data requested more than once: should not happen")); - - new->mask |= category; -} - - /* The address of this function will be assigned to the hook in the error functions. */ static void -error_print () +error_print (void) { - /* We don't want the program name to be printed in messages. Emacs' - compile.el does not like this. */ } @@ -461,13 +316,15 @@ construct_output_path (char *path) { const char *normal = NULL; char *result; + char *endp; if (strchr (path, '/') == NULL) { /* This is a system path. First examine whether the locale name contains a reference to the codeset. This should be normalized. */ - char *startp, *endp; + char *startp; + size_t n; startp = path; /* We must be prepared for finding a CEN name or a location of @@ -493,17 +350,20 @@ construct_output_path (char *path) the end of the function we need another byte for the trailing '/'. */ if (normal == NULL) - asprintf (&result, "%s/%s%c", LOCALEDIR, path, '\0'); + n = asprintf (&result, "%s/%s%c", LOCALEDIR, path, '\0'); else - asprintf (&result, "%s/%.*s%s%s%c", LOCALEDIR, startp - path, path, - normal, endp, '\0'); + n = asprintf (&result, "%s/%.*s%s%s%c", LOCALEDIR, startp - path, path, + normal, endp, '\0'); + + endp = result + n; } else { /* This is a user path. Please note the additional byte in the memory allocation. */ - result = xmalloc (strlen (path) + 2); - strcpy (result, path); + size_t len = strlen (path) + 1; + result = xmalloc (len + 1); + endp = mempcpy (result, path, len); } errno = 0; @@ -516,11 +376,13 @@ construct_output_path (char *path) mkdir (result, 0777); } - strcat (result, "/"); + *endp++ = '/'; + *endp = '\0'; return result; } + /* Normalize codeset name. There is no standard for the codeset names. Normalization allows the user to use any of the common names. */ @@ -555,7 +417,7 @@ normalize_codeset (codeset, name_len) for (cnt = 0; cnt < name_len; ++cnt) if (isalpha (codeset[cnt])) - *wp++ = _tolower (codeset[cnt]); + *wp++ = tolower (codeset[cnt]); else if (isdigit (codeset[cnt])) *wp++ = codeset[cnt]; @@ -564,3 +426,52 @@ normalize_codeset (codeset, name_len) return (const char *) retval; } + + +struct localedef_t * +add_to_readlist (int locale, const char *name, const char *repertoire_name) +{ + struct localedef_t *runp = locales; + + while (runp != NULL && strcmp (name, runp->name) != 0) + runp = runp->next; + + if (runp == NULL) + { + /* Add a new entry at the end. */ + struct localedef_t *newp = xcalloc (1, sizeof (struct localedef_t)); + newp->name = name; + newp->repertoire_name = repertoire_name; + + if (locales == NULL) + runp = locales = newp; + else + { + runp = locales; + while (runp->next != NULL) + runp = runp->next; + runp = runp->next = newp; + } + } + + if ((runp->needed & (1 << locale)) != 0) + error (5, 0, _("circular dependencies between locale definitions")); + + runp->needed |= 1 << locale; + + return runp; +} + + +struct localedef_t * +find_locale (int locale, const char *name, const char *repertoire_name, + struct charmap_t *charmap) +{ + struct localedef_t *result = add_to_readlist (locale, name, repertoire_name); + + if (locfile_read (result, charmap) != 0) + error (4, errno, _("cannot open locale definition file `%s'"), + result->name); + + return result; +} diff --git a/locale/programs/localedef.h b/locale/programs/localedef.h new file mode 100644 index 0000000..075cf89 --- /dev/null +++ b/locale/programs/localedef.h @@ -0,0 +1,131 @@ +/* General definitions for localedef(1). + Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef _LOCALEDEF_H +#define _LOCALEDEF_H 1 + +/* Get the basic locale definitions. */ +#include <locale.h> +#include <stddef.h> + +#include "repertoire.h" + + +/* We need a bitmask for the locales. */ +enum +{ + CTYPE_LOCALE = 1 << LC_CTYPE, + NUMERIC_LOCALE = 1 << LC_NUMERIC, + TIME_LOCALE = 1 << LC_TIME, + COLLATE_LOCALE = 1 << LC_COLLATE, + MONETARY_LOCALE = 1 << LC_MONETARY, + MESSAGES_LOCALE = 1 << LC_MESSAGES, + PAPER_LOCALE = 1 << LC_PAPER, + NAME_LOCALE = 1 << LC_NAME, + ADDRESS_LOCALE = 1 << LC_ADDRESS, + TELEPHONE_LOCALE = 1 << LC_TELEPHONE, + MEASUREMENT_LOCALE = 1 << LC_MEASUREMENT, + IDENTIFICATION_LOCALE = 1 << LC_IDENTIFICATION, + ALL_LOCALES = (1 << LC_CTYPE + | 1 << LC_NUMERIC + | 1 << LC_TIME + | 1 << LC_COLLATE + | 1 << LC_MONETARY + | 1 << LC_MESSAGES + | 1 << LC_PAPER + | 1 << LC_NAME + | 1 << LC_ADDRESS + | 1 << LC_TELEPHONE + | 1 << LC_MEASUREMENT + | 1 << LC_IDENTIFICATION) +}; + + +/* Opaque types for the different locales. */ +struct locale_ctype_t; +struct locale_collate_t; +struct locale_monetary_t; +struct locale_numeric_t; +struct locale_time_t; +struct locale_messages_t; +struct locale_paper_t; +struct locale_name_t; +struct locale_address_t; +struct locale_telephone_t; +struct locale_measurement_t; +struct locale_identification_t; + + +/* Definitions for the locale. */ +struct localedef_t +{ + struct localedef_t *next; + + const char *name; + + int needed; + int avail; + + union + { + void *generic; + struct locale_ctype_t *ctype; + struct locale_collate_t *collate; + struct locale_monetary_t *monetary; + struct locale_numeric_t *numeric; + struct locale_time_t *time; + struct locale_messages_t *messages; + struct locale_paper_t *paper; + struct locale_name_t *name; + struct locale_address_t *address; + struct locale_telephone_t *telephone; + struct locale_measurement_t *measurement; + struct locale_identification_t *identification; + } categories[12]; + + size_t len[12]; + + const char *repertoire_name; +}; + + +/* Global variables of the localedef program. */ +extern int verbose; +extern int be_quiet; +extern const char *repertoire_global; + + +/* Prototypes for a few program-wide used functions. */ +extern void *xmalloc (size_t __n); +extern void *xcalloc (size_t __n, size_t __size); +extern void *xrealloc (void *__p, size_t __n); +extern char *xstrdup (const char *__str); + + +/* Mark given locale as to be read. */ +extern struct localedef_t *add_to_readlist (int locale, const char *name, + const char *repertoire_name); + +/* Find the information for the locale NAME. */ +extern struct localedef_t *find_locale (int locale, const char *name, + const char *repertoire_name, + struct charmap_t *charmap); + +#endif /* localedef.h */ diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf index 991e9dd..91c2089 100644 --- a/locale/programs/locfile-kw.gperf +++ b/locale/programs/locfile-kw.gperf @@ -1,7 +1,7 @@ %{ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -24,77 +24,175 @@ %} struct keyword_t ; %% -escape_char, tok_escape_char, 0 -comment_char, tok_comment_char, 0 -repertoiremap, tok_repertoiremap, 0 -LC_CTYPE, tok_lc_ctype, 0 -END, tok_end, 0 -copy, tok_copy, 0 -upper, tok_upper, 0 -lower, tok_lower, 0 -alpha, tok_alpha, 0 -digit, tok_digit, 0 -alnum, tok_alnum, 0 -space, tok_space, 0 -cntrl, tok_cntrl, 0 -punct, tok_punct, 0 -graph, tok_graph, 0 -print, tok_print, 0 -xdigit, tok_xdigit, 0 -blank, tok_blank, 0 -charclass, tok_charclass, 0 -charconv, tok_charconv, 0 -toupper, tok_toupper, 0 -tolower, tok_tolower, 0 -LC_COLLATE, tok_lc_collate, 0 -collating-element, tok_collating_element, 0 -collating-symbol, tok_collating_symbol, 0 -order_start, tok_order_start, 0 -order_end, tok_order_end, 0 -from, tok_from, 0 -forward, tok_forward, 0 -backward, tok_backward, 0 -position, tok_position, 0 -UNDEFINED, tok_undefined, 0 -IGNORE, tok_ignore, 0 -LC_MONETARY, tok_lc_monetary, 0 -int_curr_symbol, tok_int_curr_symbol, 0 -currency_symbol, tok_currency_symbol, 0 -mon_decimal_point, tok_mon_decimal_point, 0 -mon_thousands_sep, tok_mon_thousands_sep, 0 -mon_grouping, tok_mon_grouping, 0 -positive_sign, tok_positive_sign, 0 -negative_sign, tok_negative_sign, 0 -int_frac_digits, tok_int_frac_digits, 0 -frac_digits, tok_frac_digits, 0 -p_cs_precedes, tok_p_cs_precedes, 0 -p_sep_by_space, tok_p_sep_by_space, 0 -n_cs_precedes, tok_n_cs_precedes, 0 -n_sep_by_space, tok_n_sep_by_space, 0 -p_sign_posn, tok_p_sign_posn, 0 -n_sign_posn, tok_n_sign_posn, 0 -LC_NUMERIC, tok_lc_numeric, 0 -decimal_point, tok_decimal_point, 0 -thousands_sep, tok_thousands_sep, 0 -grouping, tok_grouping, 0 -LC_TIME, tok_lc_time, 0 -abday, tok_abday, 0 -day, tok_day, 0 -abmon, tok_abmon, 0 -mon, tok_mon, 0 -d_t_fmt, tok_d_t_fmt, 0 -d_fmt, tok_d_fmt, 0 -t_fmt, tok_t_fmt, 0 -am_pm, tok_am_pm, 0 -t_fmt_ampm, tok_t_fmt_ampm, 0 -era, tok_era, 0 -era_year, tok_era_year, 0 -era_d_fmt, tok_era_d_fmt, 0 -era_d_t_fmt, tok_era_d_t_fmt, 0 -era_t_fmt, tok_era_t_fmt, 0 -alt_digits, tok_alt_digits, 0 -LC_MESSAGES, tok_lc_messages, 0 -yesexpr, tok_yesexpr, 0 -noexpr, tok_noexpr, 0 -yesstr, tok_yesstr, 0 -nostr, tok_nostr, 0 +escape_char, tok_escape_char, 0 +comment_char, tok_comment_char, 0 +repertoiremap, tok_repertoiremap, 0 +include, tok_include, 0 +LC_CTYPE, tok_lc_ctype, 0 +END, tok_end, 0 +copy, tok_copy, 0 +upper, tok_upper, 0 +lower, tok_lower, 0 +alpha, tok_alpha, 0 +digit, tok_digit, 0 +outdigit, tok_outdigit, 0 +alnum, tok_alnum, 0 +space, tok_space, 0 +cntrl, tok_cntrl, 0 +punct, tok_punct, 0 +graph, tok_graph, 0 +print, tok_print, 0 +xdigit, tok_xdigit, 0 +blank, tok_blank, 0 +charclass, tok_charclass, 0 +class, tok_class, 0 +charconv, tok_charconv, 0 +toupper, tok_toupper, 0 +tolower, tok_tolower, 0 +map, tok_map, 0 +translit_start, tok_translit_start, 0 +translit_end, tok_translit_end, 0 +default_missing, tok_default_missing, 0 +LC_COLLATE, tok_lc_collate, 0 +coll_weight_max, tok_coll_weight_max, 0 +section-symbol, tok_section_symbol, 0 +collating-element, tok_collating_element, 0 +collating-symbol, tok_collating_symbol, 0 +symbol-equivalence, tok_symbol_equivalence, 0 +order_start, tok_order_start, 0 +order_end, tok_order_end, 0 +from, tok_from, 0 +forward, tok_forward, 0 +backward, tok_backward, 0 +position, tok_position, 0 +UNDEFINED, tok_undefined, 0 +IGNORE, tok_ignore, 0 +reorder-after, tok_reorder_after, 0 +reorder-end, tok_reorder_end, 0 +reorder-sections-after, tok_reorder_sections_after, 0 +reorder-sections-end, tok_reorder_sections_end, 0 +define, tok_define, 0 +undef, tok_undef, 0 +ifdef, tok_ifdef, 0 +else, tok_else, 0 +elif, tok_elif, 0 +endif, tok_endif, 0 +LC_MONETARY, tok_lc_monetary, 0 +int_curr_symbol, tok_int_curr_symbol, 0 +currency_symbol, tok_currency_symbol, 0 +mon_decimal_point, tok_mon_decimal_point, 0 +mon_thousands_sep, tok_mon_thousands_sep, 0 +mon_grouping, tok_mon_grouping, 0 +positive_sign, tok_positive_sign, 0 +negative_sign, tok_negative_sign, 0 +int_frac_digits, tok_int_frac_digits, 0 +frac_digits, tok_frac_digits, 0 +p_cs_precedes, tok_p_cs_precedes, 0 +p_sep_by_space, tok_p_sep_by_space, 0 +n_cs_precedes, tok_n_cs_precedes, 0 +n_sep_by_space, tok_n_sep_by_space, 0 +p_sign_posn, tok_p_sign_posn, 0 +n_sign_posn, tok_n_sign_posn, 0 +int_p_cs_precedes, tok_int_p_cs_precedes, 0 +int_p_sep_by_space, tok_int_p_sep_by_space, 0 +int_n_cs_precedes, tok_int_n_cs_precedes, 0 +int_n_sep_by_space, tok_int_n_sep_by_space, 0 +int_p_sign_posn, tok_int_p_sign_posn, 0 +int_n_sign_posn, tok_int_n_sign_posn, 0 +duo_int_curr_symbol, tok_duo_int_curr_symbol, 0 +duo_currency_symbol, tok_duo_currency_symbol, 0 +duo_int_frac_digits, tok_duo_int_frac_digits, 0 +duo_frac_digits, tok_duo_frac_digits, 0 +duo_p_cs_precedes, tok_duo_p_cs_precedes, 0 +duo_p_sep_by_space, tok_duo_p_sep_by_space, 0 +duo_n_cs_precedes, tok_duo_n_cs_precedes, 0 +duo_n_sep_by_space, tok_duo_n_sep_by_space, 0 +duo_int_p_cs_precedes, tok_duo_int_p_cs_precedes, 0 +duo_int_p_sep_by_space, tok_duo_int_p_sep_by_space, 0 +duo_int_n_cs_precedes, tok_duo_int_n_cs_precedes, 0 +duo_int_n_sep_by_space, tok_duo_int_n_sep_by_space, 0 +duo_p_sign_posn, tok_duo_p_sign_posn, 0 +duo_n_sign_posn, tok_duo_n_sign_posn, 0 +duo_int_p_sign_posn, tok_duo_int_p_sign_posn, 0 +duo_int_n_sign_posn, tok_duo_int_n_sign_posn, 0 +uno_valid_from, tok_uno_valid_from, 0 +uno_valid_to, tok_uno_valid_to, 0 +duo_valid_from, tok_duo_valid_from, 0 +duo_valid_to, tok_duo_valid_to, 0 +conversion_rate, tok_conversion_rate, 0 +LC_NUMERIC, tok_lc_numeric, 0 +decimal_point, tok_decimal_point, 0 +thousands_sep, tok_thousands_sep, 0 +grouping, tok_grouping, 0 +LC_TIME, tok_lc_time, 0 +abday, tok_abday, 0 +day, tok_day, 0 +week, tok_week, 0 +abmon, tok_abmon, 0 +mon, tok_mon, 0 +d_t_fmt, tok_d_t_fmt, 0 +d_fmt, tok_d_fmt, 0 +t_fmt, tok_t_fmt, 0 +am_pm, tok_am_pm, 0 +t_fmt_ampm, tok_t_fmt_ampm, 0 +era, tok_era, 0 +era_year, tok_era_year, 0 +era_d_fmt, tok_era_d_fmt, 0 +era_d_t_fmt, tok_era_d_t_fmt, 0 +era_t_fmt, tok_era_t_fmt, 0 +alt_digits, tok_alt_digits, 0 +first_weekday, tok_first_weekday, 0 +first_workday, tok_first_workday, 0 +cal_direction, tok_cal_direction, 0 +timezone, tok_timezone, 0 +LC_MESSAGES, tok_lc_messages, 0 +yesexpr, tok_yesexpr, 0 +noexpr, tok_noexpr, 0 +yesstr, tok_yesstr, 0 +nostr, tok_nostr, 0 +LC_PAPER, tok_lc_paper, 0 +height, tok_height, 0 +width, tok_width, 0 +LC_NAME, tok_lc_name, 0 +name_fmt, tok_name_fmt, 0 +name_gen, tok_name_gen, 0 +name_mr, tok_name_mr, 0 +name_mrs, tok_name_mrs, 0 +name_miss, tok_name_miss, 0 +name_ms, tok_name_ms, 0 +LC_ADDRESS, tok_lc_address, 0 +postal_fmt, tok_postal_fmt, 0 +country_name, tok_country_name, 0 +country_post, tok_country_post, 0 +country_ab2, tok_country_ab2, 0 +country_ab3, tok_country_ab3, 0 +country_num, tok_country_num, 0 +country_car, tok_country_car, 0 +country_isbn, tok_country_isbn, 0 +lang_name, tok_lang_name, 0 +lang_ab, tok_lang_ab, 0 +lang_term, tok_lang_term, 0 +lang_lib, tok_lang_lib, 0 +LC_TELEPHONE, tok_lc_telephone, 0 +tel_int_fmt, tok_tel_int_fmt, 0 +tel_dom_fmt, tok_tel_dom_fmt, 0 +int_select, tok_int_select, 0 +int_prefix, tok_int_prefix, 0 +LC_MEASUREMENT, tok_lc_measurement, 0 +measurement, tok_measurement, 0 +LC_IDENTIFICATION, tok_lc_identification, 0 +title, tok_title, 0 +source, tok_source, 0 +address, tok_address, 0 +contact, tok_contact, 0 +email, tok_email, 0 +tel, tok_tel, 0 +fax, tok_fax, 0 +language, tok_language, 0 +territory, tok_territory, 0 +audience, tok_audience, 0 +application, tok_application, 0 +abbreviation, tok_abbreviation, 0 +revision, tok_revision, 0 +date, tok_date, 0 +category, tok_category, 0 diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h index bd80618..811234b 100644 --- a/locale/programs/locfile-kw.h +++ b/locale/programs/locfile-kw.h @@ -1,8 +1,8 @@ -/* C code produced by gperf version 2.5 (GNU C++ version) */ -/* Command-line: gperf -acCgopt -k1,2,5,$ -N locfile_hash programs/locfile-kw.gperf */ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. +/* ANSI-C code produced by gperf version 2.7.1 (19981006 egcs) */ +/* Command-line: gperf -acCgopt -k1,2,5,9,$ -L ANSI-C -N locfile_hash programs/locfile-kw.gperf */ +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -24,174 +24,368 @@ #include "locfile-token.h" struct keyword_t ; -#define TOTAL_KEYWORDS 74 +#define TOTAL_KEYWORDS 172 #define MIN_WORD_LENGTH 3 -#define MAX_WORD_LENGTH 17 +#define MAX_WORD_LENGTH 22 #define MIN_HASH_VALUE 3 -#define MAX_HASH_VALUE 178 -/* maximum key range = 176, duplicates = 0 */ +#define MAX_HASH_VALUE 545 +/* maximum key range = 543, duplicates = 0 */ #ifdef __GNUC__ -inline +__inline #endif static unsigned int -hash (register const char *str, register int len) +hash (register const char *str, register unsigned int len) { - static const unsigned char asso_values[] = + static const unsigned short asso_values[] = { - 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, - 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, - 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, - 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, - 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, - 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, - 179, 179, 179, 179, 179, 179, 179, 0, 0, 0, - 0, 0, 179, 0, 179, 179, 0, 179, 0, 45, - 179, 179, 0, 0, 0, 5, 179, 179, 179, 10, - 179, 179, 179, 179, 179, 5, 179, 0, 5, 0, - 15, 20, 5, 20, 40, 20, 179, 25, 15, 50, - 10, 0, 0, 179, 45, 50, 0, 30, 0, 5, - 10, 60, 179, 179, 179, 179, 179, 179, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 5, 0, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 20, 546, 0, 0, 0, + 5, 30, 0, 0, 546, 546, 0, 546, 0, 0, + 546, 546, 10, 0, 5, 10, 546, 546, 546, 0, + 546, 546, 546, 546, 546, 30, 546, 0, 10, 125, + 5, 0, 105, 30, 5, 95, 546, 0, 105, 155, + 135, 50, 75, 0, 5, 45, 0, 55, 0, 30, + 25, 25, 10, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546 }; register int hval = len; switch (hval) { default: + case 9: + hval += asso_values[(unsigned char)str[8]]; + case 8: + case 7: + case 6: case 5: - hval += asso_values[str[4]]; + hval += asso_values[(unsigned char)str[4]]; case 4: case 3: case 2: - hval += asso_values[str[1]]; + hval += asso_values[(unsigned char)str[1]]; case 1: - hval += asso_values[str[0]]; + hval += asso_values[(unsigned char)str[0]]; break; } - return hval + asso_values[str[len - 1]]; + return hval + asso_values[(unsigned char)str[len - 1]]; } #ifdef __GNUC__ -inline +__inline #endif const struct keyword_t * -locfile_hash (register const char *str, register int len) +locfile_hash (register const char *str, register unsigned int len) { static const struct keyword_t wordlist[] = { - {"",}, {"",}, {"",}, - {"END", tok_end, 0}, - {"",}, {"",}, - {"IGNORE", tok_ignore, 0}, - {"LC_TIME", tok_lc_time, 0}, - {"LC_CTYPE", tok_lc_ctype, 0}, - {"",}, - {"t_fmt", tok_t_fmt, 0}, - {"LC_MESSAGES", tok_lc_messages, 0}, - {"",}, {"",}, - {"UNDEFINED", tok_undefined, 0}, - {"LC_NUMERIC", tok_lc_numeric, 0}, - {"",}, - {"collating-element", tok_collating_element, 0}, - {"position", tok_position, 0}, - {"",}, - {"alpha", tok_alpha, 0}, - {"",}, {"",}, - {"positive_sign", tok_positive_sign, 0}, - {"",}, - {"d_fmt", tok_d_fmt, 0}, - {"",}, - {"forward", tok_forward, 0}, - {"",}, {"",}, - {"abmon", tok_abmon, 0}, - {"collating-symbol", tok_collating_symbol, 0}, - {"d_t_fmt", tok_d_t_fmt, 0}, - {"backward", tok_backward, 0}, - {"",}, - {"punct", tok_punct, 0}, - {"",}, {"",}, {"",}, - {"p_sep_by_space", tok_p_sep_by_space, 0}, - {"digit", tok_digit, 0}, - {"",}, {"",}, {"",}, {"",}, - {"cntrl", tok_cntrl, 0}, - {"p_sign_posn", tok_p_sign_posn, 0}, - {"",}, - {"charconv", tok_charconv, 0}, - {"n_sep_by_space", tok_n_sep_by_space, 0}, - {"print", tok_print, 0}, - {"xdigit", tok_xdigit, 0}, - {"toupper", tok_toupper, 0}, - {"negative_sign", tok_negative_sign, 0}, - {"",}, - {"LC_COLLATE", tok_lc_collate, 0}, - {"n_sign_posn", tok_n_sign_posn, 0}, - {"tolower", tok_tolower, 0}, - {"",}, {"",}, - {"int_curr_symbol", tok_int_curr_symbol, 0}, - {"noexpr", tok_noexpr, 0}, - {"",}, - {"mon", tok_mon, 0}, - {"copy", tok_copy, 0}, - {"t_fmt_ampm", tok_t_fmt_ampm, 0}, - {"LC_MONETARY", tok_lc_monetary, 0}, - {"mon_thousands_sep", tok_mon_thousands_sep, 0}, - {"era", tok_era, 0}, - {"",}, {"",}, {"",}, {"",}, - {"p_cs_precedes", tok_p_cs_precedes, 0}, - {"era_t_fmt", tok_era_t_fmt, 0}, - {"blank", tok_blank, 0}, - {"",}, - {"comment_char", tok_comment_char, 0}, - {"day", tok_day, 0}, - {"",}, - {"currency_symbol", tok_currency_symbol, 0}, - {"",}, - {"mon_decimal_point", tok_mon_decimal_point, 0}, - {"n_cs_precedes", tok_n_cs_precedes, 0}, - {"",}, {"",}, {"",}, {"",}, {"",}, - {"era_d_fmt", tok_era_d_fmt, 0}, - {"alt_digits", tok_alt_digits, 0}, - {"era_d_t_fmt", tok_era_d_t_fmt, 0}, - {"",}, - {"grouping", tok_grouping, 0}, - {"",}, - {"space", tok_space, 0}, - {"",}, {"",}, - {"decimal_point", tok_decimal_point, 0}, - {"charclass", tok_charclass, 0}, - {"int_frac_digits", tok_int_frac_digits, 0}, - {"order_start", tok_order_start, 0}, - {"mon_grouping", tok_mon_grouping, 0}, - {"thousands_sep", tok_thousands_sep, 0}, - {"from", tok_from, 0}, - {"nostr", tok_nostr, 0}, - {"",}, {"",}, {"",}, {"",}, - {"lower", tok_lower, 0}, - {"",}, {"",}, {"",}, - {"order_end", tok_order_end, 0}, - {"",}, - {"frac_digits", tok_frac_digits, 0}, - {"",}, {"",}, {"",}, - {"alnum", tok_alnum, 0}, - {"",}, {"",}, - {"repertoiremap", tok_repertoiremap, 0}, - {"",}, - {"upper", tok_upper, 0}, - {"escape_char", tok_escape_char, 0}, - {"",}, {"",}, {"",}, - {"abday", tok_abday, 0}, - {"yesstr", tok_yesstr, 0}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"",}, - {"yesexpr", tok_yesexpr, 0}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"graph", tok_graph, 0}, - {"",}, {"",}, {"",}, {"",}, - {"am_pm", tok_am_pm, 0}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"",}, {"",}, {"",}, {"",}, - {"era_year", tok_era_year, 0}, + {""}, {""}, {""}, + {"END", tok_end, 0}, + {""}, {""}, {""}, + {"LC_TIME", tok_lc_time, 0}, + {"era", tok_era, 0}, + {"date", tok_date, 0}, + {"LC_ADDRESS", tok_lc_address, 0}, + {""}, + {"LC_TELEPHONE", tok_lc_telephone, 0}, + {"LC_CTYPE", tok_lc_ctype, 0}, + {"era_t_fmt", tok_era_t_fmt, 0}, + {"LC_COLLATE", tok_lc_collate, 0}, + {"height", tok_height, 0}, + {"LC_IDENTIFICATION", tok_lc_identification, 0}, + {""}, + {"era_d_fmt", tok_era_d_fmt, 0}, + {"LC_NUMERIC", tok_lc_numeric, 0}, + {""}, {""}, {""}, + {"UNDEFINED", tok_undefined, 0}, + {""}, + {"reorder-end", tok_reorder_end, 0}, + {"LC_NAME", tok_lc_name, 0}, + {"reorder-after", tok_reorder_after, 0}, + {"LC_MEASUREMENT", tok_lc_measurement, 0}, + {""}, + {"LC_MONETARY", tok_lc_monetary, 0}, + {""}, + {"day", tok_day, 0}, + {"week", tok_week, 0}, + {"t_fmt", tok_t_fmt, 0}, + {"yesstr", tok_yesstr, 0}, + {""}, + {"LC_PAPER", tok_lc_paper, 0}, + {""}, + {"d_fmt", tok_d_fmt, 0}, + {"LC_MESSAGES", tok_lc_messages, 0}, + {""}, + {"era_year", tok_era_year, 0}, + {""}, {""}, + {"IGNORE", tok_ignore, 0}, + {""}, {""}, {""}, + {"graph", tok_graph, 0}, + {""}, {""}, + {"backward", tok_backward, 0}, + {""}, {""}, {""}, + {"address", tok_address, 0}, + {""}, {""}, {""}, {""}, + {"yesexpr", tok_yesexpr, 0}, + {"audience", tok_audience, 0}, + {""}, + {"abday", tok_abday, 0}, + {""}, {""}, {""}, {""}, {""}, + {"order_start", tok_order_start, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"order_end", tok_order_end, 0}, + {"reorder-sections-end", tok_reorder_sections_end, 0}, + {""}, + {"reorder-sections-after", tok_reorder_sections_after, 0}, + {""}, {""}, + {"print", tok_print, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"tolower", tok_tolower, 0}, + {""}, + {"translit_start", tok_translit_start, 0}, + {""}, {""}, + {"translit_end", tok_translit_end, 0}, + {""}, {""}, + {"title", tok_title, 0}, + {""}, {""}, + {"repertoiremap", tok_repertoiremap, 0}, + {""}, + {"digit", tok_digit, 0}, + {""}, {""}, + {"tel", tok_tel, 0}, + {"else", tok_else, 0}, + {"alpha", tok_alpha, 0}, + {""}, {""}, + {"timezone", tok_timezone, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"blank", tok_blank, 0}, + {"tel_dom_fmt", tok_tel_dom_fmt, 0}, + {""}, {""}, {""}, + {"space", tok_space, 0}, + {"era_d_t_fmt", tok_era_d_t_fmt, 0}, + {"duo_valid_to", tok_duo_valid_to, 0}, + {""}, {""}, {""}, + {"xdigit", tok_xdigit, 0}, + {""}, + {"fax", tok_fax, 0}, + {""}, + {"punct", tok_punct, 0}, + {""}, + {"toupper", tok_toupper, 0}, + {"symbol-equivalence", tok_symbol_equivalence, 0}, + {""}, + {"width", tok_width, 0}, + {"escape_char", tok_escape_char, 0}, + {""}, {""}, + {"lang_name", tok_lang_name, 0}, + {"upper", tok_upper, 0}, + {"define", tok_define, 0}, + {"d_t_fmt", tok_d_t_fmt, 0}, + {"grouping", tok_grouping, 0}, + {""}, {""}, {""}, + {"lang_ab", tok_lang_ab, 0}, + {"lang_lib", tok_lang_lib, 0}, + {"territory", tok_territory, 0}, + {""}, {""}, + {"abbreviation", tok_abbreviation, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"alt_digits", tok_alt_digits, 0}, + {""}, + {"forward", tok_forward, 0}, + {"language", tok_language, 0}, + {""}, + {"lower", tok_lower, 0}, + {""}, {""}, + {"name_fmt", tok_name_fmt, 0}, + {""}, {""}, {""}, + {"name_mr", tok_name_mr, 0}, + {""}, {""}, {""}, {""}, + {"contact", tok_contact, 0}, + {"thousands_sep", tok_thousands_sep, 0}, + {""}, {""}, + {"country_ab3", tok_country_ab3, 0}, + {""}, + {"category", tok_category, 0}, + {""}, {""}, + {"country_ab2", tok_country_ab2, 0}, + {""}, + {"revision", tok_revision, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"nostr", tok_nostr, 0}, + {""}, {""}, {""}, + {"copy", tok_copy, 0}, + {""}, {""}, {""}, + {"outdigit", tok_outdigit, 0}, + {""}, {""}, + {"tel_int_fmt", tok_tel_int_fmt, 0}, + {""}, {""}, + {"elif", tok_elif, 0}, + {""}, {""}, + {"name_ms", tok_name_ms, 0}, + {"name_mrs", tok_name_mrs, 0}, + {""}, {""}, + {"measurement", tok_measurement, 0}, + {"collating-element", tok_collating_element, 0}, + {""}, + {"p_sep_by_space", tok_p_sep_by_space, 0}, + {""}, + {"source", tok_source, 0}, + {"duo_p_cs_precedes", tok_duo_p_cs_precedes, 0}, + {"duo_p_sep_by_space", tok_duo_p_sep_by_space, 0}, + {""}, {""}, {""}, {""}, + {"map", tok_map, 0}, + {"duo_valid_from", tok_duo_valid_from, 0}, + {""}, {""}, {""}, + {"first_weekday", tok_first_weekday, 0}, + {""}, + {"conversion_rate", tok_conversion_rate, 0}, + {""}, {""}, + {"first_workday", tok_first_workday, 0}, + {""}, {""}, {""}, {""}, + {"decimal_point", tok_decimal_point, 0}, + {""}, {""}, {""}, + {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0}, + {""}, {""}, + {"duo_frac_digits", tok_duo_frac_digits, 0}, + {""}, + {"uno_valid_to", tok_uno_valid_to, 0}, + {""}, {""}, + {"default_missing", tok_default_missing, 0}, + {""}, + {"country_post", tok_country_post, 0}, + {"charconv", tok_charconv, 0}, + {"name_miss", tok_name_miss, 0}, + {""}, {""}, {""}, + {"position", tok_position, 0}, + {"from", tok_from, 0}, + {"t_fmt_ampm", tok_t_fmt_ampm, 0}, + {"noexpr", tok_noexpr, 0}, + {""}, {""}, {""}, + {"coll_weight_max", tok_coll_weight_max, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"n_sep_by_space", tok_n_sep_by_space, 0}, + {"abmon", tok_abmon, 0}, + {""}, + {"duo_n_cs_precedes", tok_duo_n_cs_precedes, 0}, + {"duo_n_sep_by_space", tok_duo_n_sep_by_space, 0}, + {""}, + {"postal_fmt", tok_postal_fmt, 0}, + {"frac_digits", tok_frac_digits, 0}, + {"include", tok_include, 0}, + {""}, {""}, {""}, + {"duo_int_p_cs_precedes", tok_duo_int_p_cs_precedes, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"positive_sign", tok_positive_sign, 0}, + {"section-symbol", tok_section_symbol, 0}, + {""}, {""}, {""}, + {"name_gen", tok_name_gen, 0}, + {"duo_currency_symbol", tok_duo_currency_symbol, 0}, + {""}, {""}, + {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0}, + {"negative_sign", tok_negative_sign, 0}, + {""}, + {"duo_p_sign_posn", tok_duo_p_sign_posn, 0}, + {"country_car", tok_country_car, 0}, + {"comment_char", tok_comment_char, 0}, + {"p_cs_precedes", tok_p_cs_precedes, 0}, + {""}, {""}, {""}, + {"country_name", tok_country_name, 0}, + {""}, + {"duo_int_frac_digits", tok_duo_int_frac_digits, 0}, + {"class", tok_class, 0}, + {"collating-symbol", tok_collating_symbol, 0}, + {""}, {""}, {""}, + {"currency_symbol", tok_currency_symbol, 0}, + {"p_sign_posn", tok_p_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"mon_thousands_sep", tok_mon_thousands_sep, 0}, + {"mon", tok_mon, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"endif", tok_endif, 0}, + {""}, + {"mon_grouping", tok_mon_grouping, 0}, + {""}, + {"charclass", tok_charclass, 0}, + {""}, + {"duo_int_n_cs_precedes", tok_duo_int_n_cs_precedes, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"uno_valid_from", tok_uno_valid_from, 0}, + {""}, {""}, {""}, {""}, {""}, + {"email", tok_email, 0}, + {""}, {""}, {""}, {""}, + {"duo_n_sign_posn", tok_duo_n_sign_posn, 0}, + {""}, {""}, + {"n_cs_precedes", tok_n_cs_precedes, 0}, + {""}, {""}, {""}, + {"mon_decimal_point", tok_mon_decimal_point, 0}, + {""}, + {"duo_int_p_sign_posn", tok_duo_int_p_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"n_sign_posn", tok_n_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, + {"int_p_cs_precedes", tok_int_p_cs_precedes, 0}, + {"int_p_sep_by_space", tok_int_p_sep_by_space, 0}, + {""}, {""}, {""}, {""}, + {"cal_direction", tok_cal_direction, 0}, + {"duo_int_curr_symbol", tok_duo_int_curr_symbol, 0}, + {"undef", tok_undef, 0}, + {""}, {""}, {""}, {""}, + {"int_select", tok_int_select, 0}, + {"application", tok_application, 0}, + {""}, {""}, {""}, + {"ifdef", tok_ifdef, 0}, + {""}, + {"country_isbn", tok_country_isbn, 0}, + {""}, {""}, + {"alnum", tok_alnum, 0}, + {""}, {""}, {""}, {""}, + {"int_frac_digits", tok_int_frac_digits, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"int_prefix", tok_int_prefix, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"duo_int_n_sign_posn", tok_duo_int_n_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"lang_term", tok_lang_term, 0}, + {""}, {""}, + {"int_n_cs_precedes", tok_int_n_cs_precedes, 0}, + {"int_n_sep_by_space", tok_int_n_sep_by_space, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, + {"am_pm", tok_am_pm, 0}, + {""}, {""}, {""}, {""}, + {"cntrl", tok_cntrl, 0}, + {"country_num", tok_country_num, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"int_p_sign_posn", tok_int_p_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"int_curr_symbol", tok_int_curr_symbol, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"int_n_sign_posn", tok_int_n_sign_posn, 0} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) @@ -202,7 +396,7 @@ locfile_hash (register const char *str, register int len) { register const char *s = wordlist[key].name; - if (*s == *str && !strncmp (str + 1, s + 1, len - 1)) + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h index 7845b4b..97945f8 100644 --- a/locale/programs/locfile-token.h +++ b/locale/programs/locfile-token.h @@ -1,6 +1,6 @@ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -28,17 +28,19 @@ enum token_t tok_eol, tok_bsymbol, tok_ident, - tok_ellipsis, + tok_ellipsis2, + tok_ellipsis3, + tok_ellipsis4, tok_semicolon, tok_comma, tok_open_brace, tok_close_brace, tok_charcode, - tok_ucs2, tok_ucs4, tok_number, tok_minus1, tok_string, + tok_include, tok_escape_char, tok_comment_char, @@ -48,6 +50,8 @@ enum token_t tok_g1esc, tok_g2esc, tok_g3esc, + tok_escseq, + tok_addset, tok_charids, @@ -62,6 +66,7 @@ enum token_t tok_lc_ctype, tok_copy, + /* Keep the following entries up to the next comment in this order! */ tok_upper, tok_lower, tok_alpha, @@ -74,12 +79,22 @@ enum token_t tok_cntrl, tok_punct, tok_alnum, + /* OK, shuffling allowed again. */ + tok_outdigit, tok_charclass, + tok_class, tok_toupper, tok_tolower, + tok_map, + tok_translit_start, + tok_translit_end, + tok_default_missing, tok_lc_collate, + tok_coll_weight_max, + tok_section_symbol, tok_collating_element, tok_collating_symbol, + tok_symbol_equivalence, tok_order_start, tok_order_end, tok_from, @@ -88,6 +103,17 @@ enum token_t tok_position, tok_undefined, tok_ignore, + tok_reorder_after, + tok_reorder_end, + tok_reorder_sections_after, + tok_reorder_sections_end, + tok_define, + tok_undef, + tok_ifdef, + tok_ifndef, + tok_else, + tok_elif, + tok_endif, tok_lc_monetary, tok_int_curr_symbol, tok_currency_symbol, @@ -104,6 +130,33 @@ enum token_t tok_n_sep_by_space, tok_p_sign_posn, tok_n_sign_posn, + tok_int_p_cs_precedes, + tok_int_p_sep_by_space, + tok_int_n_cs_precedes, + tok_int_n_sep_by_space, + tok_int_p_sign_posn, + tok_int_n_sign_posn, + tok_duo_int_curr_symbol, + tok_duo_currency_symbol, + tok_duo_int_frac_digits, + tok_duo_frac_digits, + tok_duo_p_cs_precedes, + tok_duo_p_sep_by_space, + tok_duo_n_cs_precedes, + tok_duo_n_sep_by_space, + tok_duo_int_p_cs_precedes, + tok_duo_int_p_sep_by_space, + tok_duo_int_n_cs_precedes, + tok_duo_int_n_sep_by_space, + tok_duo_p_sign_posn, + tok_duo_n_sign_posn, + tok_duo_int_p_sign_posn, + tok_duo_int_n_sign_posn, + tok_uno_valid_from, + tok_uno_valid_to, + tok_duo_valid_from, + tok_duo_valid_to, + tok_conversion_rate, tok_lc_numeric, tok_decimal_point, tok_thousands_sep, @@ -124,11 +177,61 @@ enum token_t tok_era_d_t_fmt, tok_era_t_fmt, tok_alt_digits, + tok_week, + tok_first_weekday, + tok_first_workday, + tok_cal_direction, + tok_timezone, tok_lc_messages, tok_yesexpr, tok_noexpr, tok_yesstr, tok_nostr, + tok_lc_paper, + tok_height, + tok_lc_name, + tok_name_fmt, + tok_name_gen, + tok_name_mr, + tok_name_mrs, + tok_name_miss, + tok_name_ms, + tok_lc_address, + tok_postal_fmt, + tok_country_name, + tok_country_post, + tok_country_ab2, + tok_country_ab3, + tok_country_num, + tok_country_car, + tok_country_isbn, + tok_lang_name, + tok_lang_ab, + tok_lang_term, + tok_lang_lib, + tok_lc_telephone, + tok_tel_int_fmt, + tok_tel_dom_fmt, + tok_int_select, + tok_int_prefix, + tok_lc_measurement, + tok_measurement, + tok_lc_identification, + tok_title, + tok_source, + tok_address, + tok_contact, + tok_email, + tok_tel, + tok_fax, + tok_language, + tok_territory, + tok_audience, + tok_application, + tok_abbreviation, + tok_revision, + tok_date, + tok_category, tok_error }; diff --git a/locale/programs/locfile.c b/locale/programs/locfile.c index 79d6ab1..fd858e2 100644 --- a/locale/programs/locfile.c +++ b/locale/programs/locfile.c @@ -23,63 +23,36 @@ #include <errno.h> #include <fcntl.h> -#include <locale.h> -#include <malloc.h> -#include <stdio.h> #include <stdlib.h> -#include <string.h> #include <unistd.h> -#include <libintl.h> +#include <sys/param.h> #include <sys/stat.h> -#include <sys/uio.h> +#include "localedef.h" #include "locfile.h" -#include "linereader.h" -#include "localeinfo.h" -#include "locales.h" - -/* Uncomment the following line in the production version. */ -/* #define NDEBUG 1 */ -#include <assert.h> - -/* Define the lookup function. */ #include "locfile-kw.h" -/* Some useful macros. */ -#define MIN(a, b) (__extension__ ({ typeof (a) _a = (a); \ - typeof (b) _b = (b); \ - _a < _b ? _a : _b; })) - - -void *xmalloc (size_t __n); -char *xstrdup (const char *__str); - -struct localedef_t * -locfile_read (const char *filename, struct charset_t *charset) +int +locfile_read (struct localedef_t *result, struct charmap_t *charmap) { - struct repertoire_t *repertoire = NULL; + const char *filename = result->name; + const char *repertoire_name = result->repertoire_name; + int locale_mask = result->needed ^ result->avail; struct linereader *ldfile; - struct localedef_t *result; - int state; - enum token_t expected_tok = tok_none; - const char *expected_str = NULL; - enum token_t ctype_tok_sym = tok_none; - const char *ctype_tok_str = NULL; - int copy_category = 0; - int cnt; - /* Allocate space for result. */ - result = (struct localedef_t *) xmalloc (sizeof (struct localedef_t)); - memset (result, '\0', sizeof (struct localedef_t)); + /* If no repertoire name was specified use the global one. */ + if (repertoire_name == NULL) + repertoire_name = repertoire_global; + /* Open the locale definition file. */ ldfile = lr_open (filename, locfile_hash); if (ldfile == NULL) { if (filename[0] != '/') { - char *i18npath = __secure_getenv ("I18NPATH"); + char *i18npath = getenv ("I18NPATH"); if (i18npath != NULL && *i18npath != '\0') { char path[strlen (filename) + 1 + strlen (i18npath) @@ -94,6 +67,13 @@ locfile_read (const char *filename, struct charset_t *charset) stpcpy (stpcpy (stpcpy (path, next), "/locales/"), filename); ldfile = lr_open (path, locfile_hash); + + if (ldfile == NULL) + { + stpcpy (stpcpy (path, next), filename); + + ldfile = lr_open (path, locfile_hash); + } } } @@ -108,965 +88,219 @@ locfile_read (const char *filename, struct charset_t *charset) } if (ldfile == NULL) - { - result->failed = 1; - return result; - } + return 1; } -#define HANDLE_COPY(category, token, string) \ - if (nowtok == tok_copy) \ - { \ - copy_posix.mask &= ~(1 << category); \ - copy_category = category; \ - expected_tok = token; \ - expected_str = string; \ - state = 8; \ - continue; \ - } \ - ++state - -#define LOCALE_PROLOG(token, string) \ - if (nowtok == tok_eol) \ - /* Ignore empty lines. */ \ - continue; \ - if (nowtok == tok_end) \ - { \ - expected_tok = token; \ - expected_str = string; \ - state = 4; \ - continue; \ - } \ - if (nowtok == tok_copy) \ - goto only_copy; - - -#define READ_STRING(fn, errlabel) \ - do \ - { \ - arg = lr_token (ldfile, charset); \ - if (arg->tok != tok_string) \ - goto errlabel; \ - fn (ldfile, result, nowtok, arg, charset); \ - lr_ignore_rest (ldfile, 1); \ - } \ - while (0) - -#define READ_STRING_LIST(fn, errlabel) \ - do \ - { \ - arg = lr_token (ldfile, charset); \ - while (arg->tok == tok_string) \ - { \ - fn (ldfile, result, nowtok, arg, charset); \ - arg = lr_token (ldfile, charset); \ - if (arg->tok != tok_semicolon) \ - break; \ - arg = lr_token (ldfile, charset); \ - } \ - if (arg->tok != tok_eol) \ - goto errlabel; \ - } \ - while (0) - -#define READ_NUMBER(fn, errlabel) \ - do \ - { \ - arg = lr_token (ldfile, charset); \ - if (arg->tok != tok_minus1 && arg->tok != tok_number) \ - goto errlabel; \ - fn (ldfile, result, nowtok, arg, charset); \ - lr_ignore_rest (ldfile, 1); \ - } \ - while (0) - -#define READ_NUMBER_LIST(fn, errlabel) \ - do \ - { \ - arg = lr_token (ldfile, charset); \ - while (arg->tok == tok_minus1 || arg->tok == tok_number) \ - { \ - fn (ldfile, result, nowtok, arg, charset); \ - arg = lr_token (ldfile, charset); \ - if (arg->tok != tok_semicolon) \ - break; \ - arg = lr_token (ldfile, charset); \ - } \ - if (arg->tok != tok_eol) \ - goto errlabel; \ - } \ - while (0) - -#define SYNTAX_ERROR(string) \ - lr_error (ldfile, string); \ - lr_ignore_rest (ldfile, 0); - - - /* Parse locale definition file and store result in RESULT. */ - state = 1; + /* Parse locale definition file and store result in RESULT. */ while (1) { - /* What's on? */ - struct token *now = lr_token (ldfile, charset); + struct token *now = lr_token (ldfile, charmap, NULL); enum token_t nowtok = now->tok; struct token *arg; if (nowtok == tok_eof) break; - switch (state) - { - case 1: - /* The beginning. We expect the special declarations, EOL or - the start of any locale. */ - if (nowtok == tok_eol) - /* Ignore empty lines. */ - continue; - - switch (nowtok) - { - case tok_escape_char: - case tok_comment_char: - /* We need an argument. */ - arg = lr_token (ldfile, charset); - - if (arg->tok != tok_ident) - { - SYNTAX_ERROR (_("bad argument")); - continue; - } - - if (arg->val.str.len != 1) - { - lr_error (ldfile, _("\ -argument to `%s' must be a single character"), - nowtok == tok_escape_char ? "escape_char" - : "comment_char"); - - lr_ignore_rest (ldfile, 0); - continue; - } - - if (nowtok == tok_escape_char) - ldfile->escape_char = *arg->val.str.start; - else - ldfile->comment_char = *arg->val.str.start; - break; - - case tok_repertoiremap: - /* We need an argument. */ - arg = lr_token (ldfile, charset); - - if (arg->tok != tok_ident) - { - SYNTAX_ERROR (_("bad argument")); - continue; - } - - if (repertoiremap == NULL) - { - repertoiremap = memcpy (xmalloc (arg->val.str.len + 1), - arg->val.str.start, - arg->val.str.len); - ((char *) repertoiremap)[arg->val.str.len] = '\0'; - } - - lr_ignore_rest (ldfile, 1); - continue; - - case tok_lc_ctype: - if (repertoire == NULL) - { - /* Read the repertoire map now. */ - if (repertoiremap == NULL) - /* This is fatal. */ - error (4, 0, - _("no repertoire map specified: cannot proceed")); - - repertoire = repertoire_read (repertoiremap); - if (repertoire == NULL) - /* This is also fatal. */ - error (4, errno, _("cannot read repertoire map `%s'"), - repertoiremap); - } - state = 2; - break; - - case tok_lc_collate: - if (repertoire == NULL) - { - /* Read the repertoire map now. */ - if (repertoiremap == NULL) - /* This is fatal. */ - error (4, 0, - _("no repertoire map specified: cannot proceed")); - - repertoire = repertoire_read (repertoiremap); - if (repertoire == NULL) - /* This is also fatal. */ - error (4, errno, _("cannot read repertoire map `%s'"), - repertoiremap); - } - state = 10; - break; - - case tok_lc_monetary: - if (repertoire == NULL) - { - /* Read the repertoire map now. */ - if (repertoiremap == NULL) - /* This is fatal. */ - error (4, 0, - _("no repertoire map specified: cannot proceed")); - - repertoire = repertoire_read (repertoiremap); - if (repertoire == NULL) - /* This is also fatal. */ - error (4, errno, _("cannot read repertoire map `%s'"), - repertoiremap); - } - state = 20; - break; + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; - case tok_lc_numeric: - if (repertoire == NULL) - { - /* Read the repertoire map now. */ - if (repertoiremap == NULL) - /* This is fatal. */ - error (4, 0, - _("no repertoire map specified: cannot proceed")); - - repertoire = repertoire_read (repertoiremap); - if (repertoire == NULL) - /* This is also fatal. */ - error (4, errno, _("cannot read repertoire map `%s'"), - repertoiremap); - } - state = 30; - break; - - case tok_lc_time: - if (repertoire == NULL) - { - /* Read the repertoire map now. */ - if (repertoiremap == NULL) - /* This is fatal. */ - error (4, 0, - _("no repertoire map specified: cannot proceed")); - - repertoire = repertoire_read (repertoiremap); - if (repertoire == NULL) - /* This is also fatal. */ - error (4, errno, _("cannot read repertoire map `%s'"), - repertoiremap); - } - state = 40; - break; - - case tok_lc_messages: - if (repertoire == NULL) - { - /* Read the repertoire map now. */ - if (repertoiremap == NULL) - /* This is fatal. */ - error (4, 0, - _("no repertoire map specified: cannot proceed")); - - repertoire = repertoire_read (repertoiremap); - if (repertoire == NULL) - /* This is also fatal. */ - error (4, errno, _("cannot read repertoire map `%s'"), - repertoiremap); - } - state = 50; - break; - - default: - SYNTAX_ERROR (_("\ -syntax error: not inside a locale definition section")); - continue; - } - lr_ignore_rest (ldfile, 1); - continue; - - case 2: - HANDLE_COPY (LC_CTYPE, tok_lc_ctype, "LC_CTYPE"); - - ctype_startup (ldfile, result, charset); - /* FALLTHROUGH */ - - case 3: - /* Here we accept all the character classes, tolower/toupper, - and following ANSI C:1995 self-defined classes. */ - LOCALE_PROLOG (tok_lc_ctype, "LC_CTYPE"); + switch (nowtok) + { + case tok_escape_char: + case tok_comment_char: + /* We need an argument. */ + arg = lr_token (ldfile, charmap, NULL); - if (nowtok == tok_charclass) + if (arg->tok != tok_ident) { - READ_STRING_LIST (ctype_class_new, bad_new_charclass); - continue; - bad_new_charclass: - SYNTAX_ERROR (_("\ -syntax error in definition of new character class")); + SYNTAX_ERROR (_("bad argument")); continue; } - if (nowtok == tok_charconv) + if (arg->val.str.lenmb != 1) { - READ_STRING_LIST (ctype_map_new, bad_new_charconv); - continue; - bad_new_charconv: - SYNTAX_ERROR (_("\ -syntax error in definition of new character map")); - continue; - } + lr_error (ldfile, _("\ +argument to `%s' must be a single character"), + nowtok == tok_escape_char + ? "escape_char" : "comment_char"); - if (nowtok == tok_upper || nowtok == tok_lower - || nowtok == tok_alpha || nowtok == tok_digit - || nowtok == tok_alnum || nowtok == tok_space - || nowtok == tok_cntrl || nowtok == tok_punct - || nowtok == tok_graph || nowtok == tok_print - || nowtok == tok_xdigit || nowtok == tok_blank) - { - ctype_tok_sym = nowtok; - ctype_tok_str = NULL; - state = 5; + lr_ignore_rest (ldfile, 0); continue; } - if (nowtok == tok_toupper|| nowtok == tok_tolower) - { - ctype_tok_sym = nowtok; - ctype_tok_str = NULL; - state = 6; - continue; - } + if (nowtok == tok_escape_char) + ldfile->escape_char = *arg->val.str.startmb; + else + ldfile->comment_char = *arg->val.str.startmb; + break; - if (nowtok != tok_ident) - goto bad_charclass; + case tok_repertoiremap: + /* We need an argument. */ + arg = lr_token (ldfile, charmap, NULL); - /* We possibly have a self-defined character class. */ - if (ctype_is_charclass (ldfile, result, now->val.str.start)) + if (arg->tok != tok_ident) { - ctype_tok_sym = nowtok; - ctype_tok_str = now->val.str.start; - state = 5; + SYNTAX_ERROR (_("bad argument")); continue; } - /* ...or a self-defined character map. */ - if (ctype_is_charconv (ldfile, result, now->val.str.start)) + if (repertoire_name == NULL) { - ctype_tok_sym = nowtok; - ctype_tok_str = now->val.str.start; - state = 6; - continue; + repertoire_name = memcpy (xmalloc (arg->val.str.lenmb + 1), + arg->val.str.startmb, + arg->val.str.lenmb); + ((char *) repertoire_name)[arg->val.str.lenmb] = '\0'; } + break; - SYNTAX_ERROR (_("syntax error in definition of LC_CTYPE category")); + case tok_lc_ctype: + ctype_read (ldfile, result, charmap, repertoire_name, + (locale_mask & CTYPE_LOCALE) == 0); + result->avail |= locale_mask & CTYPE_LOCALE; continue; - case 4: - /* Handle `END xxx'. */ - if (nowtok != expected_tok) - lr_error (ldfile, _("\ -`%1$s' definition does not end with `END %1$s'"), expected_str); - - lr_ignore_rest (ldfile, nowtok == expected_tok); - state = 1; + case tok_lc_collate: + collate_read (ldfile, result, charmap, repertoire_name, + (locale_mask & COLLATE_LOCALE) == 0); + result->avail |= locale_mask & COLLATE_LOCALE; continue; - case 5: - /* Here we expect a semicolon separated list of bsymbols. The - bit to be set in the word is given in CHARCLASS_BIT. */ - arg = now; - - ctype_class_start (ldfile, result, ctype_tok_sym, ctype_tok_str, - charset); - - while (arg->tok != tok_eol) - { - /* Any token other than a bsymbol is an error. */ - if (arg->tok != tok_bsymbol) - { - bad_charclass: - SYNTAX_ERROR (_("\ -syntax error in character class definition")); - break; - } - - /* Lookup value for token and write into array. */ - ctype_class_from (ldfile, result, arg, charset); - - arg = lr_token (ldfile, charset); - if (arg->tok == tok_semicolon) - arg = lr_token (ldfile, charset); - else if (arg->tok != tok_eol) - goto bad_charclass; - - /* Look for ellipsis. */ - if (arg->tok == tok_ellipsis) - { - arg = lr_token (ldfile, charset); - if (arg->tok != tok_semicolon) - goto bad_charclass; - - arg = lr_token (ldfile, charset); - if (arg->tok != tok_bsymbol) - goto bad_charclass; - - /* Write range starting at LAST to ARG->VAL. */ - ctype_class_to (ldfile, result, arg, charset); - - arg = lr_token (ldfile, charset); - if (arg->tok == tok_semicolon) - arg = lr_token (ldfile, charset); - else if (arg->tok != tok_eol) - goto bad_charclass; - } - } - - /* Mark class as already seen. */ - ctype_class_end (ldfile, result); - state = 3; - + case tok_lc_monetary: + monetary_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MONETARY_LOCALE) == 0); + result->avail |= locale_mask & MONETARY_LOCALE; continue; - case 6: - /* Here we expect a list of character mappings. Note: the - first opening brace is already matched. */ - ctype_map_start (ldfile, result, ctype_tok_sym, ctype_tok_str, - charset); - - while (1) - { - /* Match ( bsymbol , bsymbol ) */ - if (now->tok != tok_open_brace) - goto bad_charconv; - - now = lr_token (ldfile, charset); - if (now->tok != tok_bsymbol) - { - bad_charconv: - SYNTAX_ERROR (_("\ -syntax error in character conversion definition")); - state = 3; - break; - } - - /* Lookup arg and assign to FROM. */ - ctype_map_from (ldfile, result, now, charset); - - now = lr_token (ldfile, charset); - if (now->tok != tok_comma) - goto bad_charconv; - - now = lr_token (ldfile, charset); - if (now->tok != tok_bsymbol) - goto bad_charconv; - - /* Lookup arg and assign to TO. */ - ctype_map_to (ldfile, result, now, charset); - - now = lr_token (ldfile, charset); - if (now->tok != tok_close_brace) - goto bad_charconv; - - now = lr_token (ldfile, charset); - if (now->tok == tok_eol) - { - state = 3; - break; - } - if (now->tok != tok_semicolon) - goto bad_charconv; - - now = lr_token (ldfile, charset); - } - - ctype_map_end (ldfile, result); + case tok_lc_numeric: + numeric_read (ldfile, result, charmap, repertoire_name, + (locale_mask & NUMERIC_LOCALE) == 0); + result->avail |= locale_mask & NUMERIC_LOCALE; continue; - case 8: - { - /* We have seen `copy'. First match the argument. */ - int warned = 0; - - if (nowtok != tok_string) - lr_error (ldfile, _("expect string argument for `copy'")); - else - def_to_process (now->val.str.start, 1 << copy_category); - - lr_ignore_rest (ldfile, nowtok == tok_string); - - /* The rest of the line must be empty - and the next keyword must be `END xxx'. */ - - while (lr_token (ldfile, charset)->tok != tok_end) - { - if (warned == 0) - { - only_copy: - lr_error (ldfile, _("\ -no other keyword shall be specified when `copy' is used")); - warned = 1; - } - - lr_ignore_rest (ldfile, 0); - } - - state = 4; - } + case tok_lc_time: + time_read (ldfile, result, charmap, repertoire_name, + (locale_mask & TIME_LOCALE) == 0); + result->avail |= locale_mask & TIME_LOCALE; continue; - case 10: - HANDLE_COPY (LC_COLLATE, tok_lc_collate, "LC_COLLATE"); - - collate_startup (ldfile, result, charset); - /* FALLTHROUGH */ - - case 11: - /* Process the LC_COLLATE section. We expect `END LC_COLLATE' - any of the collation specifications, or any bsymbol. */ - LOCALE_PROLOG (tok_lc_collate, "LC_COLLATE"); - - if (nowtok == tok_order_start) - { - state = 12; - continue; - } - - if (nowtok != tok_collating_element - && nowtok != tok_collating_symbol) - { - bad_collation: - lr_error (ldfile, _("\ -syntax error in collation definition")); - lr_ignore_rest (ldfile, 0); - continue; - } - - /* Get argument. */ - arg = lr_token (ldfile, charset); - if (arg->tok != tok_bsymbol) - { - lr_error (ldfile, _("\ -collation symbol expected after `%s'"), - nowtok == tok_collating_element - ? "collating-element" : "collating-symbol"); - lr_ignore_rest (ldfile, 0); - continue; - } - - if (nowtok == tok_collating_element) - { - /* Save to-value as new name. */ - collate_element_to (ldfile, result, arg, charset); - - arg = lr_token (ldfile, charset); - if (arg->tok != tok_from) - { - lr_error (ldfile, _("\ -`from' expected after first argument to `collating-element'")); - lr_ignore_rest (ldfile, 0); - continue; - } - - arg = lr_token (ldfile, charset); - if (arg->tok != tok_string) - { - lr_error (ldfile, _("\ -from-value of `collating-element' must be a string")); - lr_ignore_rest (ldfile, 0); - continue; - } - - /* Enter new collating element. */ - collate_element_from (ldfile, result, arg, charset); - } - else - /* Enter new collating symbol into table. */ - collate_symbol (ldfile, result, arg, charset); - - lr_ignore_rest (ldfile, 1); - continue; - - case 12: - /* We parse the rest of the line containing `order_start'. - In any case we continue with parsing the symbols. */ - state = 13; - - cnt = 0; - while (now->tok != tok_eol) - { - int collation_method = 0; - - ++cnt; - - do - { - if (now->tok == tok_forward) - collation_method |= sort_forward; - else if (now->tok == tok_backward) - collation_method |= sort_backward; - else if (now->tok == tok_position) - collation_method |= sort_position; - else - { - lr_error (ldfile, _("unknown collation directive")); - lr_ignore_rest (ldfile, 0); - continue; - } - - now = lr_token (ldfile, charset); - } - while (now->tok == tok_comma - && ((now = lr_token (ldfile, charset)) != tok_none)); - - /* Check for consistency: forward and backwards are - mutually exclusive. */ - if ((collation_method & sort_forward) != 0 - && (collation_method & sort_backward) != 0) - { - lr_error (ldfile, _("\ -sorting order `forward' and `backward' are mutually exclusive")); - /* The recover clear the backward flag. */ - collation_method &= ~sort_backward; - } - - /* ??? I don't know whether this is correct but while - thinking about the `strcoll' functions I found that I - need a direction when performing position depended - collation. So I assume here that implicitly the - direction `forward' is given when `position' alone is - written. --drepper */ - if (collation_method == sort_position) - collation_method |= sort_forward; - - /* Enter info about next collation order. */ - collate_new_order (ldfile, result, collation_method); - - if (now->tok != tok_eol && now->tok != tok_semicolon) - { - lr_error (ldfile, _("\ -syntax error in `order_start' directive")); - lr_ignore_rest (ldfile, 0); - break; - } - - if (now->tok == tok_semicolon) - now = lr_token (ldfile, charset); - } - - /* If no argument to `order_start' is given, one `forward' - argument is implicitly assumed. */ - if (cnt == 0) - collate_new_order (ldfile, result, sort_forward); - - - /* We now know about all sorting rules. */ - collate_build_arrays (ldfile, result); - + case tok_lc_messages: + messages_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MESSAGES_LOCALE) == 0); + result->avail |= locale_mask & MESSAGES_LOCALE; continue; - case 13: - /* We read one symbol a line until `order_end' is found. */ - { - static int last_correct = 1; - - if (nowtok == tok_order_end) - { - state = 14; - lr_ignore_rest (ldfile, 1); - continue; - } - - /* Ignore empty lines. */ - if (nowtok == tok_eol) - continue; - - if (nowtok != tok_bsymbol && nowtok != tok_undefined - && nowtok != tok_ellipsis) - { - if (last_correct == 1) - { - lr_error (ldfile, _("\ -syntax error in collating order definition")); - last_correct = 0; - } - lr_ignore_rest (ldfile, 0); - continue; - } - else - { - last_correct = 1; - - /* Remember current token. */ - if (collate_order_elem (ldfile, result, now, charset) < 0) - continue; - } - - /* Read optional arguments. */ - arg = lr_token (ldfile, charset); - while (arg->tok != tok_eol) - { - if (arg->tok != tok_ignore && arg->tok != tok_ellipsis - && arg->tok != tok_bsymbol && arg->tok != tok_string) - break; - - if (arg->tok == tok_ignore || arg->tok == tok_ellipsis - || arg->tok == tok_string) - { - /* Call handler for simple weights. */ - if (collate_simple_weight (ldfile, result, arg, charset) - < 0) - goto illegal_weight; - - arg = lr_token (ldfile, charset); - } - else - do - { - /* Collect char. */ - int ok = collate_weight_bsymbol (ldfile, result, arg, - charset); - if (ok < 0) - goto illegal_weight; - - arg = lr_token (ldfile, charset); - } - while (arg->tok == tok_bsymbol); - - /* Are there more weights? */ - if (arg->tok != tok_semicolon) - break; - - /* Yes, prepare next weight. */ - if (collate_next_weight (ldfile, result) < 0) - goto illegal_weight; - - arg = lr_token (ldfile, charset); - } - - if (arg->tok != tok_eol) - { - SYNTAX_ERROR (_("syntax error in order specification")); - } - - collate_end_weight (ldfile, result); - illegal_weight: - } + case tok_lc_paper: + paper_read (ldfile, result, charmap, repertoire_name, + (locale_mask & PAPER_LOCALE) == 0); + result->avail |= locale_mask & PAPER_LOCALE; continue; - case 14: - /* Following to the `order_end' keyword we don't expect - anything but the `END'. */ - if (nowtok == tok_eol) - continue; - - if (nowtok != tok_end) - goto bad_collation; - - expected_tok = tok_lc_collate; - expected_str = "LC_COLLATE"; - state = 4; - - ldfile->translate_strings = 1; + case tok_lc_name: + name_read (ldfile, result, charmap, repertoire_name, + (locale_mask & NAME_LOCALE) == 0); + result->avail |= locale_mask & NAME_LOCALE; continue; - case 20: - HANDLE_COPY (LC_MONETARY, tok_lc_monetary, "LC_MONETARY"); - - monetary_startup (ldfile, result, charset); - /* FALLTHROUGH */ - - case 21: - LOCALE_PROLOG (tok_lc_monetary, "LC_MONETARY"); - - switch (nowtok) - { - case tok_int_curr_symbol: - case tok_currency_symbol: - case tok_mon_decimal_point: - case tok_mon_thousands_sep: - case tok_positive_sign: - case tok_negative_sign: - READ_STRING (monetary_add, bad_monetary); - break; - - case tok_int_frac_digits: - case tok_frac_digits: - case tok_p_cs_precedes: - case tok_p_sep_by_space: - case tok_n_cs_precedes: - case tok_n_sep_by_space: - case tok_p_sign_posn: - case tok_n_sign_posn: - READ_NUMBER (monetary_add, bad_monetary); - break; - - case tok_mon_grouping: - /* We have a semicolon separated list of integers. */ - READ_NUMBER_LIST (monetary_add, bad_monetary); - break; - - default: - bad_monetary: - SYNTAX_ERROR (_("syntax error in monetary locale definition")); - } + case tok_lc_address: + address_read (ldfile, result, charmap, repertoire_name, + (locale_mask & ADDRESS_LOCALE) == 0); + result->avail |= locale_mask & ADDRESS_LOCALE; continue; - case 30: - HANDLE_COPY (LC_NUMERIC, tok_lc_numeric, "LC_NUMERIC"); - - numeric_startup (ldfile, result, charset); - /* FALLTHROUGH */ - - case 31: - LOCALE_PROLOG (tok_lc_numeric, "LC_NUMERIC"); - - switch (nowtok) - { - case tok_decimal_point: - case tok_thousands_sep: - READ_STRING (numeric_add, bad_numeric); - break; - - case tok_grouping: - /* We have a semicolon separated list of integers. */ - READ_NUMBER_LIST (numeric_add, bad_numeric); - break; - - default: - bad_numeric: - SYNTAX_ERROR (_("syntax error in numeric locale definition")); - } + case tok_lc_telephone: + telephone_read (ldfile, result, charmap, repertoire_name, + (locale_mask & TELEPHONE_LOCALE) == 0); + result->avail |= locale_mask & TELEPHONE_LOCALE; continue; - case 40: - HANDLE_COPY (LC_TIME, tok_lc_time, "LC_TIME"); - - time_startup (ldfile, result, charset); - /* FALLTHROUGH */ - - case 41: - LOCALE_PROLOG (tok_lc_time, "LC_TIME"); - - switch (nowtok) - { - case tok_abday: - case tok_day: - case tok_abmon: - case tok_mon: - case tok_am_pm: - case tok_alt_digits: - case tok_era: - READ_STRING_LIST (time_add, bad_time); - continue; - - case tok_d_t_fmt: - case tok_d_fmt: - case tok_t_fmt: - case tok_t_fmt_ampm: - case tok_era_year: - case tok_era_d_t_fmt: - case tok_era_d_fmt: - case tok_era_t_fmt: - READ_STRING (time_add, bad_time); - break; - - default: - bad_time: - SYNTAX_ERROR (_("syntax error in time locale definition")); - } + case tok_lc_measurement: + measurement_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MEASUREMENT_LOCALE) == 0); + result->avail |= locale_mask & MEASUREMENT_LOCALE; continue; - case 50: - HANDLE_COPY (LC_MESSAGES, tok_lc_messages, "LC_MESSAGES"); - - messages_startup (ldfile, result, charset); - /* FALLTHROUGH */ - - case 51: - LOCALE_PROLOG (tok_lc_messages, "LC_MESSAGES"); - - switch (nowtok) - { - case tok_yesexpr: - case tok_noexpr: - case tok_yesstr: - case tok_nostr: - READ_STRING (messages_add, bad_message); - break; - - default: - bad_message: - SYNTAX_ERROR (_("syntax error in message locale definition")); - } + case tok_lc_identification: + identification_read (ldfile, result, charmap, repertoire_name, + (locale_mask & IDENTIFICATION_LOCALE) == 0); + result->avail |= locale_mask & IDENTIFICATION_LOCALE; continue; default: - error (5, 0, _("%s: error in state machine"), __FILE__); - /* NOTREACHED */ + SYNTAX_ERROR (_("\ +syntax error: not inside a locale definition section")); + continue; } - break; + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); } /* We read all of the file. */ lr_close (ldfile); - /* Let's see what information is available. */ - for (cnt = LC_CTYPE; cnt <= LC_MESSAGES; ++cnt) - if (result->categories[cnt].generic != NULL) - result->avail |= 1 << cnt; - - return result; + return 0; } +static void (*const check_funcs[]) (struct localedef_t *, + struct charmap_t *) = +{ + [LC_CTYPE] = ctype_finish, + [LC_COLLATE] = collate_finish, + [LC_MESSAGES] = messages_finish, + [LC_MONETARY] = monetary_finish, + [LC_NUMERIC] = numeric_finish, + [LC_TIME] = time_finish, + [LC_PAPER] = paper_finish, + [LC_NAME] = name_finish, + [LC_ADDRESS] = address_finish, + [LC_TELEPHONE] = telephone_finish, + [LC_MEASUREMENT] = measurement_finish, + [LC_IDENTIFICATION] = identification_finish +}; + + void -check_all_categories (struct localedef_t *locale, struct charset_t *charset) +check_all_categories (struct localedef_t *definitions, + struct charmap_t *charmap) { - /* Call the finishing functions for all locales. */ - if ((locale->avail & (1 << LC_CTYPE)) != 0 - && (locale->binary & (1 << LC_CTYPE)) == 0) - ctype_finish (locale, charset); - if ((locale->avail & (1 << LC_COLLATE)) != 0 - && (locale->binary & (1 << LC_COLLATE)) == 0) - collate_finish (locale, charset); - if ((locale->avail & (1 << LC_MONETARY)) != 0 - && (locale->binary & (1 << LC_MONETARY)) == 0) - monetary_finish (locale); - if ((locale->avail & (1 << LC_NUMERIC)) != 0 - && (locale->binary & (1 << LC_NUMERIC)) == 0) - numeric_finish (locale); - if ((locale->avail & (1 << LC_TIME)) != 0 - && (locale->binary & (1 << LC_TIME)) == 0) - time_finish (locale); - if ((locale->avail & (1 << LC_MESSAGES)) != 0 - && (locale->binary & (1 << LC_MESSAGES)) == 0) - messages_finish (locale); + int cnt; + + for (cnt = 0; cnt < sizeof (check_funcs) / sizeof (check_funcs[0]); ++cnt) + if (check_funcs[cnt] != NULL) + check_funcs[cnt] (definitions, charmap); } +static void (*const write_funcs[]) (struct localedef_t *, struct charmap_t *, + const char *) = +{ + [LC_CTYPE] = ctype_output, + [LC_COLLATE] = collate_output, + [LC_MESSAGES] = messages_output, + [LC_MONETARY] = monetary_output, + [LC_NUMERIC] = numeric_output, + [LC_TIME] = time_output, + [LC_PAPER] = paper_output, + [LC_NAME] = name_output, + [LC_ADDRESS] = address_output, + [LC_TELEPHONE] = telephone_output, + [LC_MEASUREMENT] = measurement_output, + [LC_IDENTIFICATION] = identification_output +}; + + void -write_all_categories (struct localedef_t *locale, struct charset_t *charset, +write_all_categories (struct localedef_t *definitions, + struct charmap_t *charmap, const char *output_path) { - /* Call all functions to write locale data. */ - if ((locale->avail & (1 << LC_CTYPE)) != 0) - ctype_output (locale, charset, output_path); - if ((locale->avail & (1 << LC_COLLATE)) != 0) - collate_output (locale, charset, output_path); - if ((locale->avail & (1 << LC_MONETARY)) != 0) - monetary_output (locale, output_path); - if ((locale->avail & (1 << LC_NUMERIC)) != 0) - numeric_output (locale, output_path); - if ((locale->avail & (1 << LC_TIME)) != 0) - time_output (locale, output_path); - if ((locale->avail & (1 << LC_MESSAGES)) != 0) - messages_output (locale, output_path); + int cnt; + + for (cnt = 0; cnt < sizeof (write_funcs) / sizeof (write_funcs[0]); ++cnt) + if (check_funcs[cnt] != NULL) + write_funcs[cnt] (definitions, charmap, output_path); } @@ -1086,7 +320,7 @@ write_locale_data (const char *output_path, const char *category, But for LC_MESSAGES we have to take care for the translation data. This means we need to have a directory LC_MESSAGES in which we place the file under the name SYS_LC_MESSAGES. */ - sprintf (fname, "%s%s", output_path, category); + sprintf (fname, "%s/%s", output_path, category); if (strcmp (category, "LC_MESSAGES") == 0) { struct stat st; @@ -1118,7 +352,7 @@ write_locale_data (const char *output_path, const char *category, if (errno == EISDIR) { - sprintf (fname, "%1$s%2$s/SYS_%2$s", output_path, category); + sprintf (fname, "%1$s/%2$s/SYS_%2$s", output_path, category); fd = creat (fname, 0666); if (fd == -1) save_err = errno; diff --git a/locale/programs/locfile.h b/locale/programs/locfile.h index 697af64..6f67039 100644 --- a/locale/programs/locfile.h +++ b/locale/programs/locfile.h @@ -1,6 +1,6 @@ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -18,61 +18,241 @@ Boston, MA 02111-1307, USA. */ #ifndef _LOCFILE_H -#define _LOCFILE_H +#define _LOCFILE_H 1 #include <sys/uio.h> -#include "charset.h" +#include "linereader.h" +#include "localedef.h" -/* Opaque types for the different locales. */ -struct locale_ctype_t; -struct locale_collate_t; -struct locale_monetary_t; -struct locale_numeric_t; -struct locale_time_t; -struct locale_messages_t; -struct localedef_t +/* Header of the locale data files. */ +struct locale_file { - int failed; - - int avail; - int binary; - - union - { - void *generic; - struct locale_ctype_t *ctype; - struct locale_collate_t *collate; - struct locale_monetary_t *monetary; - struct locale_numeric_t *numeric; - struct locale_time_t *time; - struct locale_messages_t *messages; - } categories[6]; - - size_t len[6]; + int magic; + int n; }; -/* Declared in localedef.c. */ -extern int be_quiet; -extern const char *repertoiremap; -/* Found in localedef.c. */ -void def_to_process (const char *name, int category); +/* Macros used in the parser. */ +#define SYNTAX_ERROR(string, args...) \ + do \ + { \ + lr_error (ldfile, string, ## args); \ + lr_ignore_rest (ldfile, 0); \ + } \ + while (0) + + +/* General handling of `copy'. */ +static inline void +handle_copy (struct linereader *ldfile, struct charmap_t *charmap, + struct repertoire_t *repertoire, enum token_t token, int locale, + const char *locale_name, int ignore_content) +{ + struct token *now; + int warned = 0; + + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_string) + lr_error (ldfile, _("expect string argument for `copy'")); + else if (!ignore_content) + { + if (now->val.str.startmb == NULL) + lr_error (ldfile, _("\ +locale name should consist only of portable characters")); + else + (void) add_to_readlist (locale, now->val.str.startmb, + repertoire->name); + } + + lr_ignore_rest (ldfile, now->tok == tok_string); + + /* The rest of the line must be empty and the next keyword must be + `END xxx'. */ + while (lr_token (ldfile, charmap, NULL)->tok != tok_end) + { + if (warned == 0) + { + lr_error (ldfile, _("\ +no other keyword shall be specified when `copy' is used")); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + } + + /* Handle `END xxx'. */ + if (now->tok != token) + lr_error (ldfile, _("\ +`%1$s' definition does not end with `END %1$s'"), locale_name); + + lr_ignore_rest (ldfile, now->tok == token); +} /* Found in locfile.c. */ -struct localedef_t *locfile_read (const char *filename, - struct charset_t *charset); +extern int locfile_read (struct localedef_t *result, + struct charmap_t *charmap); + +/* Check validity of all the locale data. */ +extern void check_all_categories (struct localedef_t *definitions, + struct charmap_t *charmap); + +/* Write out all locale categories. */ +extern void write_all_categories (struct localedef_t *definitions, + struct charmap_t *charmap, + const char *output_path); + +/* Write out the data. */ +extern void write_locale_data (const char *output_path, const char *category, + size_t n_elem, struct iovec *vec); + + +/* Entrypoints for the parsers of the individual categories. */ + +/* Handle LC_CTYPE category. */ +extern void ctype_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void ctype_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void ctype_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_COLLATE category. */ +extern void collate_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void collate_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void collate_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MONETARY category. */ +extern void monetary_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void monetary_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void monetary_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_NUMERIC category. */ +extern void numeric_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void numeric_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void numeric_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MESSAGES category. */ +extern void messages_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void messages_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void messages_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_TIME category. */ +extern void time_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void time_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void time_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_PAPER category. */ +extern void paper_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void paper_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void paper_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_NAME category. */ +extern void name_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void name_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void name_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); -void check_all_categories (struct localedef_t *locale, - struct charset_t *charset); +/* Handle LC_ADDRESS category. */ +extern void address_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void address_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void address_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); -void write_all_categories (struct localedef_t *locale, - struct charset_t *charset, const char *output_path); +/* Handle LC_TELEPHONE category. */ +extern void telephone_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void telephone_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void telephone_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); +/* Handle LC_MEASUREMENT category. */ +extern void measurement_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void measurement_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void measurement_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); -void write_locale_data (const char *output_path, const char *category, - size_t n_elem, struct iovec *vec); +/* Handle LC_IDENTIFICATION category. */ +extern void identification_read (struct linereader *ldfile, + struct localedef_t *result, + struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void identification_finish (struct localedef_t *locale, + struct charmap_t *charmap); +extern void identification_output (struct localedef_t *locale, + struct charmap_t *charmap, + const char *output_path); #endif /* locfile.h */ diff --git a/locale/programs/repertoire.c b/locale/programs/repertoire.c index a03021f..aabe201 100644 --- a/locale/programs/repertoire.c +++ b/locale/programs/repertoire.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1998 Free Software Foundation, Inc. +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -24,22 +24,30 @@ #include <errno.h> #include <error.h> #include <limits.h> +#include <obstack.h> +#include <search.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <libintl.h> #include "linereader.h" -#include "charset.h" +#include "charmap.h" #include "repertoire.h" #include "simple-hash.h" - - -extern void *xmalloc (size_t __n); +#include "localedef.h" /* Simple keyword hashing for the repertoiremap. */ -static const struct keyword_t *repertoiremap_hash (const char *str, int len); +static const struct keyword_t *repertoiremap_hash (const char *str, + unsigned int len); +static void repertoire_new_char (struct linereader *lr, hash_table *ht, + hash_table *rt, struct obstack *ob, + uint32_t value, const char *from, + const char *to, int decimal_ellipsis); +static int repertoire_compare (const void *p1, const void *p2); + +/* Already known repertoire maps. */ +static void *known; struct repertoire_t * @@ -47,9 +55,17 @@ repertoire_read (const char *filename) { struct linereader *repfile; struct repertoire_t *result; + struct repertoire_t **resultp; + struct repertoire_t search; int state; char *from_name = NULL; char *to_name = NULL; + enum token_t ellipsis = tok_none; + + search.name = filename; + resultp = tfind (&search, &known, &repertoire_compare); + if (resultp != NULL) + return *resultp; /* Determine path. */ repfile = lr_open (filename, repertoiremap_hash); @@ -57,7 +73,7 @@ repertoire_read (const char *filename) { if (strchr (filename, '/') == NULL) { - char *i18npath = __secure_getenv ("I18NPATH"); + char *i18npath = getenv ("I18NPATH"); if (i18npath != NULL && *i18npath != '\0') { char path[strlen (filename) + 1 + strlen (i18npath) @@ -73,6 +89,13 @@ repertoire_read (const char *filename) filename); repfile = lr_open (path, repertoiremap_hash); + + if (repfile == NULL) + { + stpcpy (stpcpy (path, next), filename); + + repfile = lr_open (path, repertoiremap_hash); + } } } @@ -98,15 +121,22 @@ repertoire_read (const char *filename) } } + /* We don't want symbolic names in string to be translated. */ + repfile->translate_strings = 0; + /* Allocate room for result. */ result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t)); memset (result, '\0', sizeof (struct repertoire_t)); + result->name = xstrdup (filename); + #define obstack_chunk_alloc malloc #define obstack_chunk_free free obstack_init (&result->mem_pool); - if (init_hash (&result->char_table, 256)) + if (init_hash (&result->char_table, 256) + || init_hash (&result->reverse_table, 256) + || init_hash (&result->seq_table, 256)) { free (result); return NULL; @@ -118,7 +148,7 @@ repertoire_read (const char *filename) while (1) { /* What's on? */ - struct token *now = lr_token (repfile, NULL); + struct token *now = lr_token (repfile, NULL, NULL); enum token_t nowtok = now->tok; struct token *arg; @@ -137,7 +167,7 @@ repertoire_read (const char *filename) if (nowtok == tok_escape_char || nowtok == tok_comment_char) { /* We know that we need an argument. */ - arg = lr_token (repfile, NULL); + arg = lr_token (repfile, NULL, NULL); if (arg->tok != tok_ident) { @@ -148,7 +178,7 @@ repertoire_read (const char *filename) continue; } - if (arg->val.str.len != 1) + if (arg->val.str.lenmb != 1) { lr_error (repfile, _("\ argument to <%s> must be a single character"), @@ -160,9 +190,9 @@ argument to <%s> must be a single character"), } if (nowtok == tok_escape_char) - repfile->escape_char = *arg->val.str.start; + repfile->escape_char = *arg->val.str.startmb; else - repfile->comment_char = *arg->val.str.start; + repfile->comment_char = *arg->val.str.startmb; lr_ignore_rest (repfile, 1); continue; @@ -209,8 +239,8 @@ argument to <%s> must be a single character"), obstack_free (&result->mem_pool, from_name); from_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); to_name = NULL; state = 3; @@ -219,8 +249,10 @@ argument to <%s> must be a single character"), case 3: /* We have two possibilities: We can see an ellipsis or an encoding value. */ - if (nowtok == tok_ellipsis) + if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 + || nowtok == tok_ellipsis2) { + ellipsis = nowtok; state = 4; continue; } @@ -232,7 +264,7 @@ argument to <%s> must be a single character"), state = 2; errno = 0; - if (nowtok != tok_ucs2 && nowtok != tok_ucs4) + if (nowtok != tok_ucs4) { lr_error (repfile, _("syntax error in repertoire map definition: %s"), @@ -243,8 +275,10 @@ argument to <%s> must be a single character"), } /* We've found a new valid definition. */ - charset_new_char (repfile, &result->char_table, 4, - now->val.charcode.val, from_name, to_name); + repertoire_new_char (repfile, &result->char_table, + &result->reverse_table, &result->mem_pool, + now->val.ucs4, from_name, to_name, + ellipsis != tok_ellipsis2); /* Ignore the rest of the line. */ lr_ignore_rest (repfile, 0); @@ -268,8 +302,8 @@ argument to <%s> must be a single character"), /* Copy the to-name in a safe place. */ to_name = (char *) obstack_copy0 (&result->mem_pool, - repfile->token.val.str.start, - repfile->token.val.str.len); + repfile->token.val.str.startmb, + repfile->token.val.str.lenmb); state = 5; continue; @@ -291,12 +325,26 @@ argument to <%s> must be a single character"), lr_close (repfile); + if (tsearch (result, &known, &repertoire_compare) == NULL) + /* Something went wrong. */ + error (0, errno, _("cannot safe new repertoire map")); + return result; } +static int +repertoire_compare (const void *p1, const void *p2) +{ + struct repertoire_t *r1 = (struct repertoire_t *) p1; + struct repertoire_t *r2 = (struct repertoire_t *) p2; + + return strcmp (r1->name, r2->name); +} + + static const struct keyword_t * -repertoiremap_hash (const char *str, int len) +repertoiremap_hash (const char *str, unsigned int len) { static const struct keyword_t wordlist[0] = { @@ -317,3 +365,134 @@ repertoiremap_hash (const char *str, int len) return NULL; } + + +static void +repertoire_new_char (struct linereader *lr, hash_table *ht, hash_table *rt, + struct obstack *ob, uint32_t value, const char *from, + const char *to, int decimal_ellipsis) +{ + char *from_end; + char *to_end; + const char *cp; + char *buf = NULL; + int prefix_len, len1, len2; + unsigned int from_nr, to_nr, cnt; + + if (to == NULL) + { + insert_entry (ht, from, strlen (from), + (void *) (unsigned long int) value); + /* Please note that it isn't a bug if a symbol is defined more + than once. All later definitions are simply discarded. */ + + insert_entry (rt, obstack_copy (ob, &value, sizeof (value)), + sizeof (value), (void *) from); + + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len1 = strlen (from); + len2 = strlen (to); + + if (len1 != len2) + { + invalid_range: + lr_error (lr, _("invalid names for character range")); + return; + } + + cp = &from[len1 - 1]; + if (decimal_ellipsis) + while (isdigit (*cp) && cp >= from) + --cp; + else + while (isxdigit (*cp) && cp >= from) + { + if (!isdigit (*cp) && !isupper (*cp)) + lr_error (lr, _("\ +hexadecimal range format should use only capital characters")); + --cp; + } + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto invalid_range; + + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); + if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, + decimal_ellipsis ? 10 : 16)) == ULONG_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are invalid names for range")); + return; + } + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is not smaller then lower limit")); + return; + } + + for (cnt = from_nr; cnt <= to_nr; ++cnt) + { + uint32_t this_value = value + (cnt - from_nr); + + obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X", + prefix_len, from, len1 - prefix_len, cnt); + + insert_entry (ht, buf, len1, + (void *) (unsigned long int) this_value); + /* Please note we don't examine the return value since it is no error + if we have two definitions for a symbol. */ + + insert_entry (rt, obstack_copy (ob, &this_value, sizeof (this_value)), + sizeof (this_value), (void *) from); + } +} + + +uint32_t +repertoire_find_value (const struct repertoire_t *rep, const char *name, + size_t len) +{ + void *result; + + if (find_entry ((hash_table *) &rep->char_table, name, len, &result) < 0) + return ILLEGAL_CHAR_VALUE; + + return (uint32_t) ((unsigned long int) result); +} + + +const char * +repertoire_find_symbol (const struct repertoire_t *rep, uint32_t ucs) +{ + void *result; + + if (find_entry ((hash_table *) &rep->reverse_table, &ucs, sizeof (ucs), + &result) < 0) + return NULL; + + return (const char *) result; +} + + +struct charseq * +repertoire_find_seq (const struct repertoire_t *rep, uint32_t ucs) +{ + void *result; + + if (find_entry ((hash_table *) &rep->seq_table, &ucs, sizeof (ucs), + &result) < 0) + return NULL; + + return (struct charseq *) result; +} diff --git a/locale/programs/repertoire.h b/locale/programs/repertoire.h index 7befeb4..ef80369 100644 --- a/locale/programs/repertoire.h +++ b/locale/programs/repertoire.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1998 Free Software Foundation, Inc. +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -21,18 +21,43 @@ #define _REPERTOIREMAP_H 1 #include <obstack.h> +#include <stdint.h> +#include "charmap.h" #include "simple-hash.h" -#include "linereader.h" struct repertoire_t { + const char *name; struct obstack mem_pool; hash_table char_table; + hash_table reverse_table; + hash_table seq_table; }; +/* We need one value to mark the error case. Let's use 0xffffffff. + I.e., it is placed in the last page of ISO 10646. For now only the + first is used and we have plenty of room. */ +#define ILLEGAL_CHAR_VALUE ((uint32_t) 0xffffffffu) + +/* Another value is needed to signal that a value is not yet determined. */ +#define UNINITIALIZED_CHAR_VALUE ((uint32_t) 0xfffffffeu) + + /* Prototypes for repertoire map handling functions. */ -struct repertoire_t *repertoire_read (const char *filename); +extern struct repertoire_t *repertoire_read (const char *filename); + +/* Return UCS4 value of character with given NAME. */ +extern uint32_t repertoire_find_value (const struct repertoire_t *repertoire, + const char *name, size_t len); + +/* Return symbol for given UCS4 value. */ +extern const char *repertoire_find_symbol (const struct repertoire_t *repertoire, + uint32_t ucs); + +/* Query the has table to memoize mapping from UCS4 to byte sequences. */ +extern struct charseq *repertoire_find_seq (const struct repertoire_t *rep, + uint32_t ucs); #endif /* repertoiremap.h */ diff --git a/locale/programs/simple-hash.h b/locale/programs/simple-hash.h index f26790b..b72e3ac 100644 --- a/locale/programs/simple-hash.h +++ b/locale/programs/simple-hash.h @@ -1,6 +1,6 @@ -/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -24,8 +24,8 @@ typedef struct hash_table { - unsigned long size; - unsigned long filled; + unsigned long int size; + unsigned long int filled; void *first; void *table; struct obstack mem_pool; @@ -33,18 +33,18 @@ typedef struct hash_table hash_table; -int init_hash __P ((hash_table *htab, unsigned long int init_size)); -int delete_hash __P ((hash_table *htab)); -int insert_entry __P ((hash_table *htab, const void *key, size_t keylen, - void *data)); -int find_entry __P ((hash_table *htab, const void *key, size_t keylen, - void **result)); -int set_entry __P ((hash_table *htab, const void *key, size_t keylen, - void *newval)); +extern int init_hash __P ((hash_table *htab, unsigned long int init_size)); +extern int delete_hash __P ((hash_table *htab)); +extern int insert_entry __P ((hash_table *htab, const void *key, size_t keylen, + void *data)); +extern int find_entry __P ((hash_table *htab, const void *key, size_t keylen, + void **result)); +extern int set_entry __P ((hash_table *htab, const void *key, size_t keylen, + void *newval)); -int iterate_table __P ((hash_table *htab, void **ptr, - const void **key, size_t *keylen, void **data)); +extern int iterate_table __P ((hash_table *htab, void **ptr, + const void **key, size_t *keylen, void **data)); -unsigned long next_prime __P ((unsigned long int seed)); +extern unsigned long int next_prime __P ((unsigned long int seed)); #endif /* simple-hash.h */ diff --git a/locale/programs/stringtrans.c b/locale/programs/stringtrans.c index 17f9670..b810129 100644 --- a/locale/programs/stringtrans.c +++ b/locale/programs/stringtrans.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -21,6 +21,7 @@ # include <config.h> #endif +#include <assert.h> #include <stdlib.h> #include "charset.h" @@ -77,8 +78,11 @@ translate_string (char *str, struct charset_t *charset) tp = &str[1]; while (tp[0] != '\0' && tp[0] != '>') - if (tp[0] == '\\' && tp[1] != '\0') - tp += 2; + if (tp[0] == '\\') + if (tp[1] != '\0') + tp += 2; + else + ++tp; else ++tp; |