aboutsummaryrefslogtreecommitdiff
path: root/locale/charmap.c
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>1996-03-28 08:30:38 +0000
committerRoland McGrath <roland@gnu.org>1996-03-28 08:30:38 +0000
commit19bc17a90548ee427035994bbc4b14395723ff1f (patch)
treee7a17eda196c2610ca4be26c9e7985815162eafb /locale/charmap.c
parent53f770e0f9d405ea8d1888254c6f7ce431b04c6e (diff)
downloadglibc-19bc17a90548ee427035994bbc4b14395723ff1f.zip
glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.gz
glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.bz2
Thu Mar 28 03:25:10 1996 Roland McGrath <roland@charlie-brown.gnu.ai.mit.edu>
* intl/Makefile (copysrc): Add missing > in sed cmd. Sat Mar 23 17:52:49 1996 Ulrich Drepper <drepper@gnu.ai.mit.edu> * Makeconfig: Rename Makefile variable nlsdir to i18ndir and change value to $(datadir)/i18n. `nls' is not an appropriate name. * Makefile (subdirs): Add new subdir wctype. * ctype/ctype-info.c: Add new global variable __ctype_names and initialize from _nl_C_LC_CTYPE. * ctype/ctype.h: In P1003.3b/D11 `alnum' is a separate character class. Use bit 11. [_ISbit]: Protect definition of bitmasks because they are also used in wctype.h. * libio/genops.c (_IO_sputbackc, _IO_sungetc): Clear EOF flag after successfully pushing back a character. Fundamental changes in locale implementation. Almost nothing from the old code is used anymore. * locale/charmap.c, locale/collate.c, locale/config.h, locale/ctypedump.c, locale/hash.h, locale/keyword.gperf, locale/keyword.h, locale/loadlocale.c, locale/locale-ctype.c, locale/locale.c locale/localeconv.c, locale/localedef.c, locale/localedef.h, locale/locfile-hash.c, locale/locfile-lex.c, locale/locfile-parse.c, locale/messages.c, locale/monetary.c, locale/numeric.c, locale/setlocale.c, locale/token.h, locale/xmalloc.c: Removed. * locale/Makefile: Update for new locale implementation with program source code distributed in subdir. * locale/categories.def, locale/iso-4217.def: Updated file for new locale implementation. * locale/langinfo.h: Updated for new locale implementation. (ERA_D_T_FMT, ERA_T_FMT): New official values according to P1003.2b/D11. (_NL_COLLATE_NRULES, _NL_COLLATE_RULES, _NL_COLLATE_HASH_SIZE, _NL_COLLATE_HASH_LAYERS, _NL_COLLATE_TABLE_EB, _NL_COLLATE_TABLE_EL, _NL_COLLATE_UNDEFINED, _NL_COLLATE_EXTRA_EB, _NL_COLLATE_EXTRA_EL, _NL_CTYPE_NAMES_EB, _NL_CTYPE_NAMES_EL, _NL_CTYPE_HASH_SIZE, _NL_CTYPE_HASH_LAYERS, _NL_CTYPE_CLASS_NAMES, _NL_CTYPE_MAP_NAMES, _NL_CTYPE_WIDTH): New internal values for extended LC_CTYPE and LC_COLLATE implementation. * locale/simple-hash.c, locale/simple-hash.h, locale/xmalloc.c, locale/xstrdup.c: Helper functions for locale related programs. * locale/C-collate.c, locale/C-ctype.c, locale/C-messages.c, locale/C-monetary.c, locale/C-numeric.c, locale/C-time.c, locale/lc-collate.c, locale/lc-ctype.c, locale/lc-messages.c, locale/lc-monetary.c, locale/lc-numeric.c, locale/lc-time.c: New implementation of locale functions, and new generated "C" locale data. * locale/loadlocale.c: Now handles word fields in locale binary automatically by changing the endianess if necessary. * locale/localeinfo.h (LIMAGIC): Changed magic number because of incompatible changes. (locale_data): Changed definition to allow word as a value type. (coll_sort_rule): Values for collation sorting mode. (_NL_CURRENT_WORD): New macro to access word value of locale entry. (__collate_table, __collate_extra): Declare new global variables for collation tables. * locale/programs/charmap-kw.gperf, locale/programs/charmap-kw.h, locale/programs/charmap.c, locale/programs/charset.c, locale/programs/charset.h, locale/programs/config.h, locale/programs/ctypedump.c, locale/programs/ld-collate.c, locale/programs/ld-ctype.c, locale/programs/ld-messages.c, locale/programs/ld-monetary.c, locale/programs/ld-numeric.c, locale/programs/ld-time.c, locale/programs/linereader.c, locale/programs/linereader.h, locale/programs/locale.c, locale/programs/localedef.c, locale/programs/locales.h, locale/programs/locfile-kw.gperf, locale/programs/locfile-kw.h, locale/programs/locfile-token.h, locale/programs/locfile.c, locale/programs/locfile.h, locale/programs/stringtrans.c, locale/programs/stringtrans.h: Implementation of locale related programs. * locale/weight.h: Functions to access collation tables. * posix/unistd.h: Define _POSIX2_LOCALEDEF. * stdio-common/printf_fp.c: Fix bug with printing certain numbers < 10^-1. Reported by Bill Metzenthen. * stdio-common/tfformat.c: Add new test for above bug. * string/strcoll.c, string/strxfrm.c: Real implementation of string collation according to ISO C. * wctype/Makefile, wctype/cname-lookup.h, wctype/iswctype.c, wctype/test_wctype.c, wctype/towctrans.c, wctype/wcfuncs.c, wctype/wctrans.c, wctype/wctype.c, wctype/wctype.h: New files. Implementation of wide character classes and mapping.
Diffstat (limited to 'locale/charmap.c')
-rw-r--r--locale/charmap.c524
1 files changed, 0 insertions, 524 deletions
diff --git a/locale/charmap.c b/locale/charmap.c
deleted file mode 100644
index ad1075e..0000000
--- a/locale/charmap.c
+++ /dev/null
@@ -1,524 +0,0 @@
-/* Copyright (C) 1995 Free Software Foundation, Inc.
-
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
-
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB. If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA. */
-
-#include <ctype.h>
-#include <errno.h>
-#include <libintl.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "localedef.h"
-#include "hash.h"
-
-/* Data structure for representing charmap database. */
-struct charmap charmap_data;
-
-/* Line number in charmap file. */
-static unsigned int line_no;
-
-/* Prototypes for local functions. */
-static void read_prolog (FILE *infile);
-static unsigned long read_body (FILE *infile);
-
-
-/* Read complete table of symbolic names for character set from file. If
- this file does not exist or is not readable a default file is tried.
- If this also is not readable no character map is defined. */
-void
-charmap_read (const char *filename)
-{
- unsigned long max_char;
- long path_max = pathconf (".", _PC_PATH_MAX);
- char buf[path_max];
- FILE *infile = NULL;
-
- /* Initialize charmap data. */
- charmap_data.codeset_name = NULL;
- charmap_data.mb_cur_max = -1;
- charmap_data.mb_cur_min = -1;
- charmap_data.escape_char = '\\';
- charmap_data.comment_char = '#';
-
- if (filename != NULL)
- {
- strcpy (buf, filename);
- infile = fopen (filename, "r");
- if (infile == NULL && filename[0] != '/')
- {
- snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, filename);
- infile = fopen (buf, "r");
- }
- }
- if (infile == NULL)
- {
- if (filename != NULL)
- error (0, errno, gettext ("input file `%s' not found"), filename);
-
- snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, DEFAULT_CHARMAP);
- infile = fopen (buf, "r");
-
- if (infile == NULL)
- error (4, errno, gettext ("input file `%s' not found"), filename);
- }
-
- charmap_data.filename = buf;
- init_hash (&charmap_data.table, 500);
- line_no = 0;
-
- /* Read the prolog of the charmap file. */
- read_prolog (infile);
-
- /* Last works on the charmap tables global data. */
- if (charmap_data.mb_cur_max == -1)
- charmap_data.mb_cur_max = 1;
- if (charmap_data.mb_cur_min == -1)
- charmap_data.mb_cur_min = charmap_data.mb_cur_max;
-
- if ((size_t) charmap_data.mb_cur_max > sizeof (long))
- {
- error (2, 0, gettext ("program limitation: for now only upto %Zu "
- "bytes per character are allowed"), sizeof (long));
- }
-
- /* Now process all entries. */
- max_char = read_body (infile);
-
- /* We don't need the file anymore. */
- fclose (infile);
-
-
- /* Determine the optimal table size when using the simple modulo hashing
- function. */
- if (max_char >= 256)
- {
- int size;
- /* Current best values, initialized to some never reached high value. */
- int best_count = 10000;
- int best_size = 10000;
- int best_product = best_count * best_size;
-
- /* Give warning. */
- error (-1, 0, gettext ("computing character table size: this may take "
- "a while"));
-
- for (size = 256; size <= best_product; ++size)
- {
- /* Array with slot counters. */
- int cnt[size];
- /* Current character. */
- int ch;
- /* Maximal number of characters in any slot. */
- int maxcnt = 0;
- /* Product of current size and maximal count. */
- int product = 0;
- /* Iteration pointer through hashing table. */
- char *ptr = NULL;
-
- /* Initializes counters to zero. */
- memset(cnt, 0, size * sizeof (int));
-
- /* Iterate through whole hashing table. */
- while (product < best_product
- && iterate_table (&charmap_data.table, (void **) &ptr,
- (void **) &ch))
- {
- /* Increment slot counter. */
- ++cnt[ch % size];
- /* Test for current maximum. */
- if (cnt[ch % size] > maxcnt)
- {
- maxcnt = cnt[ch % size];
- product = maxcnt * size;
- }
- }
-
- if (product < best_product)
- {
- best_count = maxcnt;
- best_size = size;
- best_product = best_count * best_size;
- }
- }
-
- charmap_data.hash_size = best_size;
- charmap_data.hash_layers = best_count;
- }
- else
- {
- charmap_data.hash_size = 256;
- charmap_data.hash_layers = 1;
- }
-}
-
-
-#define SYNTAX_ERROR \
- do { error (0, 0, gettext ("%s:%u: syntax error in charmap file"), \
- charmap_data.filename, line_no); \
- goto end_of_loop; } while (0)
-
-/* Read the prolog of the charmap file until the line containing `CHARMAP'.
- All possible entries are processed. */
-static void
-read_prolog (FILE *infile)
-{
- size_t bufsize = sysconf (_SC_LINE_MAX);
- char buf[bufsize];
-
- while (1)
- {
- char *cp = buf;
- char len;
-
- /* Read the next line. */
- fgets (buf, bufsize, infile);
- len = strlen (buf);
-
- /* On EOF simply return. */
- if (len == 0 || buf[len - 1] != '\n')
- error (4, 0, gettext ("%s: unexpected end of file in charmap"),
- charmap_data.filename);
-
- /* This is the next line. */
- ++line_no;
-
- /* Comments and empty lines are ignored. */
- if (len == 1 || buf[0] == charmap_data.comment_char)
- continue;
-
- buf[len - 1] = '\0';
-
- /* Throw away leading white spaces. This is not defined in POSIX.2
- so don't do it if conformance is requested. */
- if (!posix_conformance)
- while (isspace (*cp))
- ++cp;
-
- /* If `CHARMAP' is read the prolog is over. */
- if (strncmp (cp, "CHARMAP", 7) == 0
- && (!posix_conformance || cp[7] == '\0'))
- return;
-
- /* Now it can be only one of special symbols defining the charmap
- parameters. All are beginning with '<'. */
- if (*cp != '<')
- SYNTAX_ERROR;
-
- ++cp;
- if (strncmp (cp, "code_set_name>", 14) == 0)
- {
- char *startp;
-
-#define cp_to_arg(no,pred) \
- cp += no; \
- while (isspace (*cp)) \
- ++cp; \
- if (*cp == '\0' || !pred (*cp)) \
- SYNTAX_ERROR;
-
- cp_to_arg (14,isgraph)
-
- if (charmap_data.codeset_name != NULL)
- {
- error (0, 0, gettext ("%s:%u: duplicate code set name "
- "specification"),
- charmap_data.filename, line_no);
- free (charmap_data.codeset_name);
- }
-
- startp = cp;
- while (*cp != '\0' && isgraph (*cp) && !isspace (*cp))
- ++cp;
-
- charmap_data.codeset_name = (char *) xmalloc (cp - startp + 1);
- strncpy (startp, startp, cp - startp);
- }
- else if (strncmp (cp, "mb_cur_max>", 11) == 0)
- {
- int new_val;
- cp_to_arg (11,isdigit)
-
- if (charmap_data.mb_cur_max != -1)
- error (0, 0,
- gettext ("%s:%u: duplicate definition of mb_cur_max"),
- charmap_data.filename, line_no);
-
- new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
- if (new_val < 1)
- error (0, 0, gettext ("%s:%u: illegal value for mb_cur_max: %d"),
- charmap_data.filename, line_no, new_val);
- else
- charmap_data.mb_cur_max = new_val;
- }
- else if (strncmp (cp, "mb_cur_min>", 11) == 0)
- {
- int new_val;
- cp_to_arg (11,isdigit)
-
- if (charmap_data.mb_cur_max != -1)
- error (0, 0,
- gettext ("%s:%u: duplicate definition of mb_cur_min"),
- charmap_data.filename, line_no);
-
- new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
- if (new_val < 1)
- error (0, 0, gettext ("%s:%u: illegal value for mb_cur_min: %d"),
- charmap_data.filename, line_no, new_val);
- else
- charmap_data.mb_cur_min = new_val;
- }
- else if (strncmp (cp, "escape_char>", 12) == 0)
- {
- cp_to_arg (12, isgraph)
- charmap_data.escape_char = *cp;
- }
- else if (strncmp (cp, "comment_char>", 13) == 0)
- {
- cp_to_arg (13, isgraph)
- charmap_data.comment_char = *cp;
- }
- else
- SYNTAX_ERROR;
- end_of_loop:
- }
-}
-#undef cp_to_arg
-
-
-static unsigned long
-read_body (FILE *infile)
-{
- unsigned long max_char = 0;
- size_t bufsize = sysconf (_SC_LINE_MAX);
- char buf[bufsize];
- char name_str[bufsize / 2];
- char code_str[bufsize / 2];
-
- while (1)
- {
- char *cp = buf;
- size_t len;
-
- /* Read the next line. */
- fgets (buf, bufsize, infile);
- len = strlen (buf);
-
- /* On EOF simply return. */
- if (len == 0)
- error (0, 0, gettext ("%s: `END CHARMAP' is missing"),
- charmap_data.filename);
-
- /* This is the next line. */
- ++line_no;
-
- if (len == bufsize - 1)
- {
- error (0, 0, gettext ("%s:%u: line too long; use `getconf "
- "LINE_MAX' to get the current maximum line"
- "length"), charmap_data.filename, line_no);
- do
- {
- fgets (buf, bufsize, infile);
- len = strlen (buf);
- }
- while (len == bufsize - 1);
- continue;
- }
-
- /* Comments and empty lines are ignored. */
- if (len == 1 || buf[0] == charmap_data.comment_char)
- continue;
-
- buf[len - 1] = '\0';
-
- /* Throw away leading white spaces. This is not defined in POSIX.2
- so don't do it if conformance is requested. */
- if (!posix_conformance)
- while (isspace (*cp))
- ++cp;
-
- if (*cp == '<')
- {
- char *end1p, *end2p, *start2p;
- size_t cnt = 0;
- unsigned long char_value = 0;
-
- if (sscanf (cp + 1, "%s %s", name_str, code_str) != 2)
- SYNTAX_ERROR;
-
- end1p = cp = name_str;
- while (*cp != '\0' && *cp != '>')
- {
- if (*cp == charmap_data.escape_char)
- if (*++cp == '\0')
- SYNTAX_ERROR;
- *end1p++ = *cp++;
- }
- if (*cp == '\0')
- /* No final '>'. Make error condition. */
- end1p = name_str;
- else
- ++cp;
-
- *end1p = '\0';
-
- if (*cp == '.' && *++cp == '.' && *++cp == '.' && *++cp == '<')
- {
- /* This might be the alternate form. */
- start2p = end2p = ++cp;
- while (*cp != '\0' && *cp != '>')
- {
- if (*cp == charmap_data.escape_char)
- if (*++cp == '\0')
- SYNTAX_ERROR;
- *end2p = *cp++;
- }
- if (*cp == '\0')
- /* NO final '>'. Make error condition. */
- end2p = start2p;
- else
- ++cp;
- }
- else
- start2p = end2p = NULL;
-
-
- if (end1p == name_str || (start2p != NULL && start2p != end2p)
- || *cp != '\0'
- || *code_str != charmap_data.escape_char)
- SYNTAX_ERROR;
-
- cp = code_str;
- do
- {
- char *begin;
- long val;
-
- switch (*++cp)
- {
- case 'd':
- val = strtol ((begin = cp + 1), &cp, 10);
- break;
- case 'x':
- val = strtol ((begin = cp + 1), &cp, 16);
- break;
- default:
- val = strtol ((begin = cp), &cp, 8);
- break;
- }
- if (begin == cp)
- SYNTAX_ERROR;
-
- if (posix_conformance && cp - begin < 2)
- error (0, 0, gettext ("%s:%u: byte constant has less than "
- "two digits"),
- charmap_data.filename, line_no);
-
- if (val < 0 || val > 255)
- {
- error (0, 0, gettext ("%s:%u: character encoding must be "
- "given in 8-bit bytes"),
- charmap_data.filename, line_no);
- goto end_of_loop;
- }
-
- if (cnt < (size_t) charmap_data.mb_cur_max)
- {
- if (cnt < sizeof (long)) /* FIXME */
- char_value = (char_value << 8) | val;
- }
- else
- {
- error (0, 0, gettext ("%s:%u: number of bytes in character "
- "definition exceeds `mb_cur_max'"),
- charmap_data.filename, line_no);
- break;
- }
- ++cnt;
- }
- while (*cp == charmap_data.escape_char);
-
- /* Ignore the rest of the line (comment). */
- if (end2p == NULL)
- {
- if (insert_entry (&charmap_data.table, name_str,
- end1p - name_str, (void *) char_value))
- error (0, 0, gettext ("%s:%u: duplicate entry"),
- charmap_data.filename, line_no);
-
- max_char = MAX (max_char, char_value);
- }
- else
- {
- char *en1, *en2, *start1p;
- long n1, n2, n;
-
- start1p = name_str;
-
- while (*start1p == *start2p && !isdigit (*start1p)
- && start1p < end1p)
- ++start1p, ++start2p;
-
- n1 = strtol (start1p, &en1, 10);
- n2 = strtol (start2p, &en2, 10);
-
- if (en1 - start1p != en2 - start2p || en1 != end1p
- || en2 != end2p)
- SYNTAX_ERROR;
-
- if (n1 > n2)
- error (0, 0, gettext ("%s:%u: starting character is bigger "
- "than last"),
- charmap_data.filename, line_no);
-
- n = n1;
- while (n <= n2)
- {
- snprintf(start1p, en1 - start1p, "%0*d", en1 - start1p, n);
-
- if (insert_entry (&charmap_data.table, name_str,
- en1 - name_str,
- (void *) (char_value + n - n1)))
- error (0, 0, gettext ("%s:%u: duplicate entry"),
- charmap_data.filename, line_no);
-
- max_char = MAX (max_char, char_value + n - n1);
- ++n;
- }
- }
- }
- else
- {
- if (strncmp (cp, "END CHARMAP", 11) == 0)
- return max_char;
-
- SYNTAX_ERROR;
- }
- end_of_loop:
- }
-
- return max_char;
-}
-
-/*
- * Local Variables:
- * mode:c
- * c-basic-offset:2
- * End:
- */