aboutsummaryrefslogtreecommitdiff
path: root/locale/charmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'locale/charmap.c')
-rw-r--r--locale/charmap.c524
1 files changed, 524 insertions, 0 deletions
diff --git a/locale/charmap.c b/locale/charmap.c
new file mode 100644
index 0000000..ad1075e
--- /dev/null
+++ b/locale/charmap.c
@@ -0,0 +1,524 @@
+/* Copyright (C) 1995 Free Software Foundation, Inc.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "localedef.h"
+#include "hash.h"
+
+/* Data structure for representing charmap database. */
+struct charmap charmap_data;
+
+/* Line number in charmap file. */
+static unsigned int line_no;
+
+/* Prototypes for local functions. */
+static void read_prolog (FILE *infile);
+static unsigned long read_body (FILE *infile);
+
+
+/* Read complete table of symbolic names for character set from file. If
+ this file does not exist or is not readable a default file is tried.
+ If this also is not readable no character map is defined. */
+void
+charmap_read (const char *filename)
+{
+ unsigned long max_char;
+ long path_max = pathconf (".", _PC_PATH_MAX);
+ char buf[path_max];
+ FILE *infile = NULL;
+
+ /* Initialize charmap data. */
+ charmap_data.codeset_name = NULL;
+ charmap_data.mb_cur_max = -1;
+ charmap_data.mb_cur_min = -1;
+ charmap_data.escape_char = '\\';
+ charmap_data.comment_char = '#';
+
+ if (filename != NULL)
+ {
+ strcpy (buf, filename);
+ infile = fopen (filename, "r");
+ if (infile == NULL && filename[0] != '/')
+ {
+ snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, filename);
+ infile = fopen (buf, "r");
+ }
+ }
+ if (infile == NULL)
+ {
+ if (filename != NULL)
+ error (0, errno, gettext ("input file `%s' not found"), filename);
+
+ snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, DEFAULT_CHARMAP);
+ infile = fopen (buf, "r");
+
+ if (infile == NULL)
+ error (4, errno, gettext ("input file `%s' not found"), filename);
+ }
+
+ charmap_data.filename = buf;
+ init_hash (&charmap_data.table, 500);
+ line_no = 0;
+
+ /* Read the prolog of the charmap file. */
+ read_prolog (infile);
+
+ /* Last works on the charmap tables global data. */
+ if (charmap_data.mb_cur_max == -1)
+ charmap_data.mb_cur_max = 1;
+ if (charmap_data.mb_cur_min == -1)
+ charmap_data.mb_cur_min = charmap_data.mb_cur_max;
+
+ if ((size_t) charmap_data.mb_cur_max > sizeof (long))
+ {
+ error (2, 0, gettext ("program limitation: for now only upto %Zu "
+ "bytes per character are allowed"), sizeof (long));
+ }
+
+ /* Now process all entries. */
+ max_char = read_body (infile);
+
+ /* We don't need the file anymore. */
+ fclose (infile);
+
+
+ /* Determine the optimal table size when using the simple modulo hashing
+ function. */
+ if (max_char >= 256)
+ {
+ int size;
+ /* Current best values, initialized to some never reached high value. */
+ int best_count = 10000;
+ int best_size = 10000;
+ int best_product = best_count * best_size;
+
+ /* Give warning. */
+ error (-1, 0, gettext ("computing character table size: this may take "
+ "a while"));
+
+ for (size = 256; size <= best_product; ++size)
+ {
+ /* Array with slot counters. */
+ int cnt[size];
+ /* Current character. */
+ int ch;
+ /* Maximal number of characters in any slot. */
+ int maxcnt = 0;
+ /* Product of current size and maximal count. */
+ int product = 0;
+ /* Iteration pointer through hashing table. */
+ char *ptr = NULL;
+
+ /* Initializes counters to zero. */
+ memset(cnt, 0, size * sizeof (int));
+
+ /* Iterate through whole hashing table. */
+ while (product < best_product
+ && iterate_table (&charmap_data.table, (void **) &ptr,
+ (void **) &ch))
+ {
+ /* Increment slot counter. */
+ ++cnt[ch % size];
+ /* Test for current maximum. */
+ if (cnt[ch % size] > maxcnt)
+ {
+ maxcnt = cnt[ch % size];
+ product = maxcnt * size;
+ }
+ }
+
+ if (product < best_product)
+ {
+ best_count = maxcnt;
+ best_size = size;
+ best_product = best_count * best_size;
+ }
+ }
+
+ charmap_data.hash_size = best_size;
+ charmap_data.hash_layers = best_count;
+ }
+ else
+ {
+ charmap_data.hash_size = 256;
+ charmap_data.hash_layers = 1;
+ }
+}
+
+
+#define SYNTAX_ERROR \
+ do { error (0, 0, gettext ("%s:%u: syntax error in charmap file"), \
+ charmap_data.filename, line_no); \
+ goto end_of_loop; } while (0)
+
+/* Read the prolog of the charmap file until the line containing `CHARMAP'.
+ All possible entries are processed. */
+static void
+read_prolog (FILE *infile)
+{
+ size_t bufsize = sysconf (_SC_LINE_MAX);
+ char buf[bufsize];
+
+ while (1)
+ {
+ char *cp = buf;
+ char len;
+
+ /* Read the next line. */
+ fgets (buf, bufsize, infile);
+ len = strlen (buf);
+
+ /* On EOF simply return. */
+ if (len == 0 || buf[len - 1] != '\n')
+ error (4, 0, gettext ("%s: unexpected end of file in charmap"),
+ charmap_data.filename);
+
+ /* This is the next line. */
+ ++line_no;
+
+ /* Comments and empty lines are ignored. */
+ if (len == 1 || buf[0] == charmap_data.comment_char)
+ continue;
+
+ buf[len - 1] = '\0';
+
+ /* Throw away leading white spaces. This is not defined in POSIX.2
+ so don't do it if conformance is requested. */
+ if (!posix_conformance)
+ while (isspace (*cp))
+ ++cp;
+
+ /* If `CHARMAP' is read the prolog is over. */
+ if (strncmp (cp, "CHARMAP", 7) == 0
+ && (!posix_conformance || cp[7] == '\0'))
+ return;
+
+ /* Now it can be only one of special symbols defining the charmap
+ parameters. All are beginning with '<'. */
+ if (*cp != '<')
+ SYNTAX_ERROR;
+
+ ++cp;
+ if (strncmp (cp, "code_set_name>", 14) == 0)
+ {
+ char *startp;
+
+#define cp_to_arg(no,pred) \
+ cp += no; \
+ while (isspace (*cp)) \
+ ++cp; \
+ if (*cp == '\0' || !pred (*cp)) \
+ SYNTAX_ERROR;
+
+ cp_to_arg (14,isgraph)
+
+ if (charmap_data.codeset_name != NULL)
+ {
+ error (0, 0, gettext ("%s:%u: duplicate code set name "
+ "specification"),
+ charmap_data.filename, line_no);
+ free (charmap_data.codeset_name);
+ }
+
+ startp = cp;
+ while (*cp != '\0' && isgraph (*cp) && !isspace (*cp))
+ ++cp;
+
+ charmap_data.codeset_name = (char *) xmalloc (cp - startp + 1);
+ strncpy (startp, startp, cp - startp);
+ }
+ else if (strncmp (cp, "mb_cur_max>", 11) == 0)
+ {
+ int new_val;
+ cp_to_arg (11,isdigit)
+
+ if (charmap_data.mb_cur_max != -1)
+ error (0, 0,
+ gettext ("%s:%u: duplicate definition of mb_cur_max"),
+ charmap_data.filename, line_no);
+
+ new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
+ if (new_val < 1)
+ error (0, 0, gettext ("%s:%u: illegal value for mb_cur_max: %d"),
+ charmap_data.filename, line_no, new_val);
+ else
+ charmap_data.mb_cur_max = new_val;
+ }
+ else if (strncmp (cp, "mb_cur_min>", 11) == 0)
+ {
+ int new_val;
+ cp_to_arg (11,isdigit)
+
+ if (charmap_data.mb_cur_max != -1)
+ error (0, 0,
+ gettext ("%s:%u: duplicate definition of mb_cur_min"),
+ charmap_data.filename, line_no);
+
+ new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
+ if (new_val < 1)
+ error (0, 0, gettext ("%s:%u: illegal value for mb_cur_min: %d"),
+ charmap_data.filename, line_no, new_val);
+ else
+ charmap_data.mb_cur_min = new_val;
+ }
+ else if (strncmp (cp, "escape_char>", 12) == 0)
+ {
+ cp_to_arg (12, isgraph)
+ charmap_data.escape_char = *cp;
+ }
+ else if (strncmp (cp, "comment_char>", 13) == 0)
+ {
+ cp_to_arg (13, isgraph)
+ charmap_data.comment_char = *cp;
+ }
+ else
+ SYNTAX_ERROR;
+ end_of_loop:
+ }
+}
+#undef cp_to_arg
+
+
+static unsigned long
+read_body (FILE *infile)
+{
+ unsigned long max_char = 0;
+ size_t bufsize = sysconf (_SC_LINE_MAX);
+ char buf[bufsize];
+ char name_str[bufsize / 2];
+ char code_str[bufsize / 2];
+
+ while (1)
+ {
+ char *cp = buf;
+ size_t len;
+
+ /* Read the next line. */
+ fgets (buf, bufsize, infile);
+ len = strlen (buf);
+
+ /* On EOF simply return. */
+ if (len == 0)
+ error (0, 0, gettext ("%s: `END CHARMAP' is missing"),
+ charmap_data.filename);
+
+ /* This is the next line. */
+ ++line_no;
+
+ if (len == bufsize - 1)
+ {
+ error (0, 0, gettext ("%s:%u: line too long; use `getconf "
+ "LINE_MAX' to get the current maximum line"
+ "length"), charmap_data.filename, line_no);
+ do
+ {
+ fgets (buf, bufsize, infile);
+ len = strlen (buf);
+ }
+ while (len == bufsize - 1);
+ continue;
+ }
+
+ /* Comments and empty lines are ignored. */
+ if (len == 1 || buf[0] == charmap_data.comment_char)
+ continue;
+
+ buf[len - 1] = '\0';
+
+ /* Throw away leading white spaces. This is not defined in POSIX.2
+ so don't do it if conformance is requested. */
+ if (!posix_conformance)
+ while (isspace (*cp))
+ ++cp;
+
+ if (*cp == '<')
+ {
+ char *end1p, *end2p, *start2p;
+ size_t cnt = 0;
+ unsigned long char_value = 0;
+
+ if (sscanf (cp + 1, "%s %s", name_str, code_str) != 2)
+ SYNTAX_ERROR;
+
+ end1p = cp = name_str;
+ while (*cp != '\0' && *cp != '>')
+ {
+ if (*cp == charmap_data.escape_char)
+ if (*++cp == '\0')
+ SYNTAX_ERROR;
+ *end1p++ = *cp++;
+ }
+ if (*cp == '\0')
+ /* No final '>'. Make error condition. */
+ end1p = name_str;
+ else
+ ++cp;
+
+ *end1p = '\0';
+
+ if (*cp == '.' && *++cp == '.' && *++cp == '.' && *++cp == '<')
+ {
+ /* This might be the alternate form. */
+ start2p = end2p = ++cp;
+ while (*cp != '\0' && *cp != '>')
+ {
+ if (*cp == charmap_data.escape_char)
+ if (*++cp == '\0')
+ SYNTAX_ERROR;
+ *end2p = *cp++;
+ }
+ if (*cp == '\0')
+ /* NO final '>'. Make error condition. */
+ end2p = start2p;
+ else
+ ++cp;
+ }
+ else
+ start2p = end2p = NULL;
+
+
+ if (end1p == name_str || (start2p != NULL && start2p != end2p)
+ || *cp != '\0'
+ || *code_str != charmap_data.escape_char)
+ SYNTAX_ERROR;
+
+ cp = code_str;
+ do
+ {
+ char *begin;
+ long val;
+
+ switch (*++cp)
+ {
+ case 'd':
+ val = strtol ((begin = cp + 1), &cp, 10);
+ break;
+ case 'x':
+ val = strtol ((begin = cp + 1), &cp, 16);
+ break;
+ default:
+ val = strtol ((begin = cp), &cp, 8);
+ break;
+ }
+ if (begin == cp)
+ SYNTAX_ERROR;
+
+ if (posix_conformance && cp - begin < 2)
+ error (0, 0, gettext ("%s:%u: byte constant has less than "
+ "two digits"),
+ charmap_data.filename, line_no);
+
+ if (val < 0 || val > 255)
+ {
+ error (0, 0, gettext ("%s:%u: character encoding must be "
+ "given in 8-bit bytes"),
+ charmap_data.filename, line_no);
+ goto end_of_loop;
+ }
+
+ if (cnt < (size_t) charmap_data.mb_cur_max)
+ {
+ if (cnt < sizeof (long)) /* FIXME */
+ char_value = (char_value << 8) | val;
+ }
+ else
+ {
+ error (0, 0, gettext ("%s:%u: number of bytes in character "
+ "definition exceeds `mb_cur_max'"),
+ charmap_data.filename, line_no);
+ break;
+ }
+ ++cnt;
+ }
+ while (*cp == charmap_data.escape_char);
+
+ /* Ignore the rest of the line (comment). */
+ if (end2p == NULL)
+ {
+ if (insert_entry (&charmap_data.table, name_str,
+ end1p - name_str, (void *) char_value))
+ error (0, 0, gettext ("%s:%u: duplicate entry"),
+ charmap_data.filename, line_no);
+
+ max_char = MAX (max_char, char_value);
+ }
+ else
+ {
+ char *en1, *en2, *start1p;
+ long n1, n2, n;
+
+ start1p = name_str;
+
+ while (*start1p == *start2p && !isdigit (*start1p)
+ && start1p < end1p)
+ ++start1p, ++start2p;
+
+ n1 = strtol (start1p, &en1, 10);
+ n2 = strtol (start2p, &en2, 10);
+
+ if (en1 - start1p != en2 - start2p || en1 != end1p
+ || en2 != end2p)
+ SYNTAX_ERROR;
+
+ if (n1 > n2)
+ error (0, 0, gettext ("%s:%u: starting character is bigger "
+ "than last"),
+ charmap_data.filename, line_no);
+
+ n = n1;
+ while (n <= n2)
+ {
+ snprintf(start1p, en1 - start1p, "%0*d", en1 - start1p, n);
+
+ if (insert_entry (&charmap_data.table, name_str,
+ en1 - name_str,
+ (void *) (char_value + n - n1)))
+ error (0, 0, gettext ("%s:%u: duplicate entry"),
+ charmap_data.filename, line_no);
+
+ max_char = MAX (max_char, char_value + n - n1);
+ ++n;
+ }
+ }
+ }
+ else
+ {
+ if (strncmp (cp, "END CHARMAP", 11) == 0)
+ return max_char;
+
+ SYNTAX_ERROR;
+ }
+ end_of_loop:
+ }
+
+ return max_char;
+}
+
+/*
+ * Local Variables:
+ * mode:c
+ * c-basic-offset:2
+ * End:
+ */