aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog38
-rw-r--r--inet/ether_aton.c1
-rw-r--r--linuxthreads/manager.c2
-rw-r--r--locale/Makefile8
-rw-r--r--locale/programs/charmap.c50
-rw-r--r--locale/programs/charset.c59
-rw-r--r--locale/programs/charset.h17
-rw-r--r--locale/programs/ld-collate.c16
-rw-r--r--locale/programs/ld-ctype.c38
-rw-r--r--locale/programs/linereader.c2
-rw-r--r--locale/programs/localedef.c17
-rw-r--r--locale/programs/locfile-kw.gperf3
-rw-r--r--locale/programs/locfile-kw.h160
-rw-r--r--locale/programs/locfile-token.h5
-rw-r--r--locale/programs/locfile.h3
-rw-r--r--locale/programs/repertoire.c323
-rw-r--r--locale/programs/repertoire.h38
-rw-r--r--locale/programs/stringtrans.c5
-rw-r--r--localedata/ChangeLog7
-rw-r--r--localedata/Makefile10
-rw-r--r--localedata/repertoiremaps/charids.894511
-rw-r--r--localedata/repertoiremaps/mnemonic.ds (renamed from localedata/mnemonic.ds)88
-rw-r--r--sysdeps/arm/memset.S1
-rw-r--r--sysdeps/unix/arm/start.c85
-rw-r--r--sysdeps/unix/sysv/linux/arm/mmap.S39
-rw-r--r--sysdeps/unix/sysv/linux/arm/sysdep.h2
26 files changed, 1291 insertions, 237 deletions
diff --git a/ChangeLog b/ChangeLog
index 39e4021..e6af1a8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,41 @@
+1998-04-30 16:45 Ulrich Drepper <drepper@cygnus.com>
+
+ * inet/ether_aton.c: Including netinet/if_ether.h is not necessary.
+
+ * locale/Makefile (distribute): Add programs/repertoire.h.
+ (localedef-modules): Add repertoire.
+ (CPPFLAGS): Define REPERTOIREMAP_PATH.
+ * locale/programs/repertoire.c: New file.
+ * locale/programs/repertoire.h: New file.
+ * locale/programs/charmap.c: Starting fixing character set handling
+ to handle multi-byte encodings.
+ * locale/programs/charset.c: Likewise.
+ * locale/programs/charset.h: Likewise.
+ * locale/programs/ld-collate.c: Likewise.
+ * locale/programs/ld-ctype.c: Likewise.
+ * locale/programs/linereader.c: Likewise.
+ * locale/programs/localedef.c: Likewise.
+ * locale/programs/locfile-kw.gperf: Likewise.
+ * locale/programs/locfile-kw.h: Likewise.
+ * locale/programs/locfile-token.h: Likewise.
+ * locale/programs/locfile.h: Likewise.
+ * locale/programs/stringtrans.c: Likewise.
+
+1998-04-18 Philip Blundell <Philip.Blundell@pobox.com>
+
+ * sysdeps/arm/memset.S: Fix off by one error.
+
+ * sysdeps/unix/sysv/linux/arm/sysdep.h (PSEUDO): On error, call
+ __syscall_error rather than syscall_error directly.
+
+1998-04-17 Philip Blundell <Philip.Blundell@pobox.com>
+
+ * sysdeps/unix/sysv/linux/arm/mmap.S: New file; implementation of
+ mmap() syscall for ARM.
+
+ * sysdeps/unix/arm/start.c: New file; startup code for ARM a.out
+ binaries.
+
1998-04-30 Ulrich Drepper <drepper@cygnus.com>
* locale/programs/localedef.c: Recognize repertoire-map option.
diff --git a/inet/ether_aton.c b/inet/ether_aton.c
index 5748007..4c80ec6 100644
--- a/inet/ether_aton.c
+++ b/inet/ether_aton.c
@@ -18,7 +18,6 @@
Boston, MA 02111-1307, USA. */
#include <netinet/ether.h>
-#include <netinet/if_ether.h>
struct ether_addr *
diff --git a/linuxthreads/manager.c b/linuxthreads/manager.c
index 993df00..8167439 100644
--- a/linuxthreads/manager.c
+++ b/linuxthreads/manager.c
@@ -313,6 +313,7 @@ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
static void pthread_free(pthread_descr th)
{
+ pthread_handle handle;
pthread_descr t;
/* Check that the thread th is still there -- pthread_reap_children
@@ -324,7 +325,6 @@ static void pthread_free(pthread_descr th)
} while (t != __pthread_main_thread);
if (t != th) return;
- pthread_handle handle;
ASSERT(th->p_exited);
/* Make the handle invalid */
handle = thread_handle(th->p_tid);
diff --git a/locale/Makefile b/locale/Makefile
index 57a5bc5..b932983 100644
--- a/locale/Makefile
+++ b/locale/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1991, 1992, 1995, 1996, 1997 Free Software Foundation, Inc.
+# Copyright (C) 1991, 92, 95, 96, 97, 98 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -30,7 +30,8 @@ distribute = localeinfo.h categories.def iso-4217.def weight.h \
$(lib-modules:=.c) config.h simple-hash.h \
charmap-kw.gperf charmap-kw.h locfile-token.h \
locfile-kw.gperf locfile-kw.h linereader.h \
- locales.h locfile.h stringtrans.h charset.h)
+ locales.h locfile.h stringtrans.h charset.h \
+ repertoire.h)
routines = setlocale findlocale loadlocale localeconv nl_langinfo \
mb_cur_max codeset_name \
newlocale duplocale freelocale
@@ -53,7 +54,7 @@ vpath %.h programs
vpath %.gperf programs
localedef-modules := $(categories:%=ld-%) charmap charset linereader \
- locfile stringtrans
+ locfile stringtrans repertoire
locale-modules := locale-spec
lib-modules := simple-hash xmalloc xstrdup
@@ -77,6 +78,7 @@ CPPFLAGS := -DLOCALE_PATH='$(localepath)' \
-DLOCALEDIR='"$(localedir)"' \
-DLOCALE_ALIAS_PATH='"$(localedir):$(i18ndir)"' \
-DCHARMAP_PATH='"$(i18ndir)/charmaps"' \
+ -DREPERTOIREMAP_PATH='"$(i18ndir)/repertoiremaps"' \
-DLOCSRCDIR='"$(i18ndir)/locales"' -DHAVE_CONFIG_H \
-Iprograms $(CPPFLAGS)
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c
index 0cd62fb..7114a23 100644
--- a/locale/programs/charmap.c
+++ b/locale/programs/charmap.c
@@ -33,6 +33,8 @@
#include "error.h"
#include "linereader.h"
#include "charset.h"
+#include "locfile.h"
+#include "repertoire.h"
/* Uncomment following line for production version. */
@@ -209,6 +211,8 @@ parse_charmap (const char *filename)
memset (result, '\0', sizeof (struct charset_t));
/* The default DEFAULT_WIDTH is 1. */
result->width_default = 1;
+ /* Let the user overwrite the repertoire map we use. */
+ result->repertoiremap = repertoiremap;
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
@@ -265,6 +269,17 @@ parse_charmap (const char *filename)
lr_ignore_rest (cmfile, 1);
+ /* Read the repertoire map now. */
+ if (result->repertoiremap == NULL)
+ /* This is fatal. */
+ error (4, 0, _("no repertoire map specified: cannot proceed"));
+
+ result->repertoire = repertoire_read (result->repertoiremap);
+ if (result->repertoire == NULL)
+ /* This is also fatal. */
+ error (4, errno, _("cannot read repertoire map `%s'"),
+ result->repertoiremap);
+
state = 2;
continue;
}
@@ -273,7 +288,7 @@ parse_charmap (const char *filename)
&& nowtok != tok_mb_cur_min && nowtok != tok_escape_char
&& nowtok != tok_comment_char && nowtok != tok_g0esc
&& nowtok != tok_g1esc && nowtok != tok_g2esc
- && nowtok != tok_g3esc)
+ && nowtok != tok_g3esc && nowtok != tok_repertoiremap)
{
lr_error (cmfile, _("syntax error in prolog: %s"),
_("illegal definition"));
@@ -305,6 +320,18 @@ parse_charmap (const char *filename)
lr_ignore_rest (cmfile, 1);
continue;
+ case tok_repertoiremap:
+ if (arg->tok != tok_ident)
+ goto badarg;
+
+ if (result->repertoiremap == NULL)
+ result->repertoiremap = obstack_copy0 (&result->mem_pool,
+ arg->val.str.start,
+ arg->val.str.len);
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
case tok_mb_cur_max:
case tok_mb_cur_min:
if (arg->tok != tok_number)
@@ -437,14 +464,14 @@ argument to <%s> must be a single character"),
continue;
}
- if (nowtok == tok_charcode)
- /* Write char value in table. */
- charset_new_char (cmfile, result, now->val.charcode.nbytes,
- now->val.charcode.val, from_name, to_name);
+ if (now->val.charcode.nbytes < result->mb_cur_min)
+ lr_error (cmfile, _("too few bytes in character encoding"));
+ else if (now->val.charcode.nbytes > result->mb_cur_max)
+ lr_error (cmfile, _("too many bytes in character encoding"));
else
- /* Determine ISO 10646 value and write into table. */
- charset_new_unicode (cmfile, result, now->val.charcode.nbytes,
- now->val.charcode.val, from_name, to_name);
+ charset_new_char (cmfile, &result->char_table,
+ now->val.charcode.nbytes,
+ now->val.charcode.val, from_name, to_name);
/* Ignore trailing comment silently. */
lr_ignore_rest (cmfile, 0);
@@ -466,8 +493,7 @@ argument to <%s> must be a single character"),
continue;
}
- /* If the previous line was not completely correct free the
- used memory. */
+ /* Copy the to-name in a safe place. */
to_name = (char *) obstack_copy0 (&result->mem_pool,
cmfile->token.val.str.start,
cmfile->token.val.str.len);
@@ -694,7 +720,7 @@ new_width (struct linereader *cmfile, struct charset_t *result,
{
unsigned int from_val, to_val;
- from_val = charset_find_value (result, from, strlen (from));
+ from_val = charset_find_value (&result->char_table, from, strlen (from));
if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE)
{
lr_error (cmfile, _("unknown character `%s'"), from);
@@ -705,7 +731,7 @@ new_width (struct linereader *cmfile, struct charset_t *result,
to_val = from_val;
else
{
- to_val = charset_find_value (result, to, strlen (to));
+ to_val = charset_find_value (&result->char_table, to, strlen (to));
if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE)
{
lr_error (cmfile, _("unknown character `%s'"), to);
diff --git a/locale/programs/charset.c b/locale/programs/charset.c
index fdacf25..767fafb 100644
--- a/locale/programs/charset.c
+++ b/locale/programs/charset.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -23,6 +23,8 @@
#include <alloca.h>
#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -31,49 +33,24 @@
#include "charset.h"
-static void
-insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to);
-
-
-void
-charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to)
-{
- if (bytes < cs->mb_cur_min)
- lr_error (lr, _("too few bytes in character encoding"));
- else if (bytes > cs->mb_cur_max)
- lr_error (lr, _("too many bytes in character encoding"));
- else
- insert_char (lr, cs, bytes, value, from, to);
-}
-
-
-void
-charset_new_unicode (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to)
-{
- /* For now: perhaps <Uxxxx> support will be removed again... */
- insert_char (lr, cs, bytes, value, from, to);
-}
-
-
unsigned int
-charset_find_value (const struct charset_t *cs, const char *name, size_t len)
+charset_find_value (const hash_table *ht, const char *name, size_t len)
{
void *result;
- if (find_entry ((hash_table *) &cs->char_table, name, len, &result) < 0)
+ if (find_entry ((hash_table *) ht, name, len, &result) < 0)
return ILLEGAL_CHAR_VALUE;
return (unsigned int) ((unsigned long int) result);
}
-static void
-insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to)
+void
+charset_new_char (struct linereader *lr, hash_table *ht, int bytes,
+ unsigned int value, const char *from, const char *to)
{
+ char *from_end;
+ char *to_end;
const char *cp;
char *buf;
int prefix_len, len1, len2;
@@ -81,7 +58,7 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
if (to == NULL)
{
- if (insert_entry (&cs->char_table, from, strlen (from),
+ if (insert_entry (ht, from, strlen (from),
(void *) (unsigned long int) value)
< 0)
lr_error (lr, _("duplicate character name `%s'"), from);
@@ -111,8 +88,16 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
goto illegal_range;
- from_nr = strtoul (&from[prefix_len], NULL, 10);
- to_nr = strtoul (&to[prefix_len], NULL, 10);
+ errno = 0;
+ from_nr = strtoul (&from[prefix_len], &from_end, 10);
+ if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
+ || ((to_nr = strtoul (&to[prefix_len], &to_end, 10)) == ULONG_MAX
+ && errno == ERANGE)
+ || *to_end != '\0')
+ {
+ lr_error (lr, _("<%s> and <%s> are illegal names for range"));
+ return;
+ }
if (from_nr > to_nr)
{
@@ -127,7 +112,7 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
{
sprintf (&buf[prefix_len], "%0*d", len1 - prefix_len, cnt);
- if (insert_entry (&cs->char_table, buf, len1,
+ if (insert_entry (ht, buf, len1,
(void *) (unsigned long int) (value + (cnt - from_nr)))
< 0)
lr_error (lr, _("duplicate character name `%s'"), buf);
diff --git a/locale/programs/charset.h b/locale/programs/charset.h
index 82c4ef0..db93f16 100644
--- a/locale/programs/charset.h
+++ b/locale/programs/charset.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -22,6 +22,7 @@
#include <obstack.h>
+#include "repertoire.h"
#include "simple-hash.h"
#include "linereader.h"
@@ -36,6 +37,9 @@ struct width_rule
struct charset_t
{
+ const char *repertoiremap;
+ struct repertoire_t *repertoire;
+
const char *code_set_name;
int mb_cur_min;
int mb_cur_max;
@@ -63,14 +67,11 @@ extern int be_quiet;
struct charset_t *charmap_read (const char *filename);
/* Prototypes for function to insert new character. */
-void charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes,
+void charset_new_char (struct linereader *lr, hash_table *ht, int bytes,
unsigned int value, const char *from, const char *to);
-void charset_new_unicode (struct linereader *lr, struct charset_t *cs,
- int bytes, unsigned int value, const char *from,
- const char *to);
-
-unsigned int charset_find_value (const struct charset_t *__cs,
- const char *__name, size_t __len);
+/* Return the value stored under the given key in the hashing table. */
+unsigned int charset_find_value (const hash_table *ht,
+ const char *name, size_t len);
#endif /* charset.h */
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 57b9776..a92ff11 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -212,7 +212,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
void *ptmp;
unsigned int value = 0;
- wch = charset_find_value (charset, patch->token, toklen);
+ wch = charset_find_value (&charset->char_table, patch->token, toklen);
if (wch != ILLEGAL_CHAR_VALUE)
{
element_t *runp;
@@ -1054,7 +1054,8 @@ collate_element_to (struct linereader *lr, struct localedef_t *locale,
collate->combine_token = NULL;
}
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if ((wchar_t) value != ILLEGAL_CHAR_VALUE)
{
lr_error (lr, _("symbol for multicharacter collating element "
@@ -1181,7 +1182,8 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale,
wchar_t value;
void *not_used;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if (value != ILLEGAL_CHAR_VALUE)
{
lr_error (lr, _("symbol for multicharacter collating element "
@@ -1268,7 +1270,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale,
{
case tok_bsymbol:
/* We have a string to find in one of the three hashing tables. */
- value = charset_find_value (charset, code->val.str.start,
+ value = charset_find_value (&charset->char_table, code->val.str.start,
code->val.str.len);
if (value != ILLEGAL_CHAR_VALUE)
{
@@ -1533,7 +1535,8 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
assert (code->tok == tok_bsymbol);
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if (value != ILLEGAL_CHAR_VALUE)
{
element_t *runp;
@@ -1706,7 +1709,8 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
return -1;
}
- wch = charset_find_value (charset, startp, putp - startp);
+ wch = charset_find_value (&charset->char_table, startp,
+ putp - startp);
if (wch != ILLEGAL_CHAR_VALUE)
{
element_t *pelem;
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index f2f32cc..2f9a9a2 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -308,7 +308,7 @@ character %s'%s' in class `%s' must not be in class `%s'"),
}
/* ... and now test <SP> as a special case. */
- space_value = charset_find_value (charset, "SP", 2);
+ space_value = charset_find_value (&charset->char_table, "SP", 2);
if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -634,7 +634,8 @@ ctype_class_from (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
ctype->last_class_char = value;
@@ -656,7 +657,8 @@ ctype_class_to (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value, cnt;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
/* In the LC_CTYPE category it is no error when a character is
not found. This has to be ignored silently. */
@@ -750,7 +752,8 @@ ctype_map_from (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
/* In the LC_CTYPE category it is no error when a character is
@@ -770,7 +773,8 @@ ctype_map_to (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
|| (wchar_t) value == ILLEGAL_CHAR_VALUE)
@@ -948,7 +952,7 @@ set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
unsigned int value;
tmp[0] = ch;
- value = charset_find_value (charset, tmp, 1);
+ value = charset_find_value (&charset->char_table, tmp, 1);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1013,7 +1017,7 @@ character `%s' not defined while needed as default value"),
{
unsigned int value;
- value = charset_find_value (charset, "space", 5);
+ value = charset_find_value (&charset->char_table, "space", 5);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1024,7 +1028,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "form-feed", 9);
+ value = charset_find_value (&charset->char_table, "form-feed", 9);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1035,7 +1039,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "newline", 7);
+ value = charset_find_value (&charset->char_table, "newline", 7);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1046,7 +1050,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "carriage-return", 15);
+ value = charset_find_value (&charset->char_table, "carriage-return", 15);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1057,7 +1061,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "tab", 3);
+ value = charset_find_value (&charset->char_table, "tab", 3);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1068,7 +1072,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "vertical-tab", 12);
+ value = charset_find_value (&charset->char_table, "vertical-tab", 12);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1097,7 +1101,7 @@ character `%s' not defined while needed as default value"),
{
unsigned int value;
- value = charset_find_value (charset, "space", 5);
+ value = charset_find_value (&charset->char_table, "space", 5);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1108,7 +1112,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
- value = charset_find_value (charset, "tab", 3);
+ value = charset_find_value (&charset->char_table, "tab", 3);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1149,7 +1153,7 @@ character `%s' not defined while needed as default value"),
if ((ctype->class_collection[cnt] & mask) != 0)
ctype->class_collection[cnt] |= BIT (tok_print);
- space = charset_find_value (charset, "space", 5);
+ space = charset_find_value (&charset->char_table, "space", 5);
if (space == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1178,7 +1182,7 @@ character `%s' not defined while needed as default value"),
tmp[1] = (char) ch;
- value_from = charset_find_value (charset, &tmp[1], 1);
+ value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1190,7 +1194,7 @@ character `%s' not defined while needed as default value"),
/* This conversion is implementation defined. */
tmp[1] = (char) (ch + ('A' - 'a'));
- value_to = charset_find_value (charset, &tmp[1], 1);
+ value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
index 6692164..4406e1a 100644
--- a/locale/programs/linereader.c
+++ b/locale/programs/linereader.c
@@ -524,7 +524,7 @@ get_string (struct linereader *lr, const struct charset_t *charset)
if (lr->translate_strings)
{
- value = charset_find_value (charset, &buf[startidx],
+ value = charset_find_value (&charset->char_table, &buf[startidx],
bufact - startidx);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
illegal_string = 1;
diff --git a/locale/programs/localedef.c b/locale/programs/localedef.c
index 32437c1..831c36e 100644
--- a/locale/programs/localedef.c
+++ b/locale/programs/localedef.c
@@ -81,8 +81,8 @@ static const char *charmap_file;
/* Name of the locale definition file. */
static const char *input_file;
-/* Name of the UCS file. */
-static const char *ucs_csn;
+/* Name of the repertoire map file. */
+const char *repertoiremap;
/* Name and version of program. */
@@ -99,9 +99,7 @@ static const struct argp_option options[] =
{ "charmap", 'f', "FILE", 0,
N_("Symbolic character names defined in FILE") },
{ "inputfile", 'i', "FILE", 0, N_("Source definitions are found in FILE") },
- { "code-set-name", 'u', "NAME", OPTION_HIDDEN,
- N_("Specify code set for mapping ISO 10646 elements") },
- { "repertoire-map", 'u', NAME, 0,
+ { "repertoire-map", 'u', "FILE", 0,
N_("file containing mapping from symbolic names to UCS4 values") },
{ NULL, 0, NULL, 0, N_("Output control:") },
@@ -355,7 +353,7 @@ parse_opt (int key, char *arg, struct argp_state *state)
input_file = arg;
break;
case 'u':
- ucs_csn = arg;
+ repertoiremap = arg;
break;
case 'v':
verbose = 1;
@@ -377,10 +375,11 @@ more_help (int key, const char *text, void *input)
case ARGP_KEY_HELP_EXTRA:
/* We print some extra information. */
asprintf (&cp, gettext ("\
-System's directory for character maps: %s\n\
- locale files : %s\n\
+System's directory for character maps : %s\n\
+ repertoire maps: %s\n\
+ locale path : %s\n\
%s"),
- CHARMAP_PATH, LOCALE_PATH, gettext ("\
+ CHARMAP_PATH, REPERTOIREMAP_PATH, LOCALE_PATH, gettext ("\
Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
return cp;
default:
diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
index 38150ad..991e9dd 100644
--- a/locale/programs/locfile-kw.gperf
+++ b/locale/programs/locfile-kw.gperf
@@ -1,5 +1,5 @@
%{
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -26,6 +26,7 @@ struct keyword_t ;
%%
escape_char, tok_escape_char, 0
comment_char, tok_comment_char, 0
+repertoiremap, tok_repertoiremap, 0
LC_CTYPE, tok_lc_ctype, 0
END, tok_end, 0
copy, tok_copy, 0
diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h
index 0fb0b5c..bd80618 100644
--- a/locale/programs/locfile-kw.h
+++ b/locale/programs/locfile-kw.h
@@ -1,6 +1,6 @@
/* C code produced by gperf version 2.5 (GNU C++ version) */
/* Command-line: gperf -acCgopt -k1,2,5,$ -N locfile_hash programs/locfile-kw.gperf */
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -24,12 +24,12 @@
#include "locfile-token.h"
struct keyword_t ;
-#define TOTAL_KEYWORDS 73
+#define TOTAL_KEYWORDS 74
#define MIN_WORD_LENGTH 3
#define MAX_WORD_LENGTH 17
#define MIN_HASH_VALUE 3
-#define MAX_HASH_VALUE 132
-/* maximum key range = 130, duplicates = 0 */
+#define MAX_HASH_VALUE 178
+/* maximum key range = 176, duplicates = 0 */
#ifdef __GNUC__
inline
@@ -39,19 +39,19 @@ hash (register const char *str, register int len)
{
static const unsigned char asso_values[] =
{
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 0, 0, 0,
- 0, 0, 133, 0, 133, 133, 0, 133, 0, 20,
- 133, 133, 0, 0, 0, 5, 133, 133, 133, 5,
- 133, 133, 133, 133, 133, 5, 133, 0, 60, 0,
- 15, 10, 20, 40, 5, 20, 133, 0, 45, 40,
- 10, 0, 0, 133, 15, 50, 0, 30, 0, 10,
- 15, 15, 133, 133, 133, 133, 133, 133,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 0, 0, 0,
+ 0, 0, 179, 0, 179, 179, 0, 179, 0, 45,
+ 179, 179, 0, 0, 0, 5, 179, 179, 179, 10,
+ 179, 179, 179, 179, 179, 5, 179, 0, 5, 0,
+ 15, 20, 5, 20, 40, 20, 179, 25, 15, 50,
+ 10, 0, 0, 179, 45, 50, 0, 30, 0, 5,
+ 10, 60, 179, 179, 179, 179, 179, 179,
};
register int hval = len;
@@ -88,102 +88,110 @@ locfile_hash (register const char *str, register int len)
{"",},
{"t_fmt", tok_t_fmt, 0},
{"LC_MESSAGES", tok_lc_messages, 0},
- {"",},
- {"charconv", tok_charconv, 0},
+ {"",}, {"",},
{"UNDEFINED", tok_undefined, 0},
{"LC_NUMERIC", tok_lc_numeric, 0},
{"",},
{"collating-element", tok_collating_element, 0},
{"position", tok_position, 0},
- {"copy", tok_copy, 0},
- {"print", tok_print, 0},
{"",},
- {"toupper", tok_toupper, 0},
+ {"alpha", tok_alpha, 0},
+ {"",}, {"",},
{"positive_sign", tok_positive_sign, 0},
{"",},
{"d_fmt", tok_d_fmt, 0},
+ {"",},
+ {"forward", tok_forward, 0},
{"",}, {"",},
- {"era", tok_era, 0},
- {"p_sep_by_space", tok_p_sep_by_space, 0},
- {"LC_COLLATE", tok_lc_collate, 0},
- {"noexpr", tok_noexpr, 0},
- {"tolower", tok_tolower, 0},
- {"day", tok_day, 0},
- {"era_t_fmt", tok_era_t_fmt, 0},
+ {"abmon", tok_abmon, 0},
+ {"collating-symbol", tok_collating_symbol, 0},
+ {"d_t_fmt", tok_d_t_fmt, 0},
+ {"backward", tok_backward, 0},
+ {"",},
{"punct", tok_punct, 0},
- {"LC_MONETARY", tok_lc_monetary, 0},
- {"comment_char", tok_comment_char, 0},
+ {"",}, {"",}, {"",},
+ {"p_sep_by_space", tok_p_sep_by_space, 0},
+ {"digit", tok_digit, 0},
+ {"",}, {"",}, {"",}, {"",},
+ {"cntrl", tok_cntrl, 0},
+ {"p_sign_posn", tok_p_sign_posn, 0},
{"",},
+ {"charconv", tok_charconv, 0},
{"n_sep_by_space", tok_n_sep_by_space, 0},
- {"digit", tok_digit, 0},
- {"order_start", tok_order_start, 0},
- {"forward", tok_forward, 0},
+ {"print", tok_print, 0},
+ {"xdigit", tok_xdigit, 0},
+ {"toupper", tok_toupper, 0},
{"negative_sign", tok_negative_sign, 0},
{"",},
- {"nostr", tok_nostr, 0},
- {"yesstr", tok_yesstr, 0},
- {"d_t_fmt", tok_d_t_fmt, 0},
- {"",},
- {"era_d_fmt", tok_era_d_fmt, 0},
- {"alpha", tok_alpha, 0},
- {"era_d_t_fmt", tok_era_d_t_fmt, 0},
+ {"LC_COLLATE", tok_lc_collate, 0},
+ {"n_sign_posn", tok_n_sign_posn, 0},
+ {"tolower", tok_tolower, 0},
+ {"",}, {"",},
+ {"int_curr_symbol", tok_int_curr_symbol, 0},
+ {"noexpr", tok_noexpr, 0},
{"",},
{"mon", tok_mon, 0},
- {"order_end", tok_order_end, 0},
+ {"copy", tok_copy, 0},
{"t_fmt_ampm", tok_t_fmt_ampm, 0},
- {"xdigit", tok_xdigit, 0},
+ {"LC_MONETARY", tok_lc_monetary, 0},
{"mon_thousands_sep", tok_mon_thousands_sep, 0},
- {"",}, {"",}, {"",},
- {"collating-symbol", tok_collating_symbol, 0},
- {"yesexpr", tok_yesexpr, 0},
- {"era_year", tok_era_year, 0},
- {"charclass", tok_charclass, 0},
- {"upper", tok_upper, 0},
- {"p_sign_posn", tok_p_sign_posn, 0},
+ {"era", tok_era, 0},
+ {"",}, {"",}, {"",}, {"",},
+ {"p_cs_precedes", tok_p_cs_precedes, 0},
+ {"era_t_fmt", tok_era_t_fmt, 0},
+ {"blank", tok_blank, 0},
{"",},
- {"thousands_sep", tok_thousands_sep, 0},
+ {"comment_char", tok_comment_char, 0},
+ {"day", tok_day, 0},
{"",},
- {"graph", tok_graph, 0},
+ {"currency_symbol", tok_currency_symbol, 0},
{"",},
{"mon_decimal_point", tok_mon_decimal_point, 0},
- {"p_cs_precedes", tok_p_cs_precedes, 0},
+ {"n_cs_precedes", tok_n_cs_precedes, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",},
+ {"era_d_fmt", tok_era_d_fmt, 0},
+ {"alt_digits", tok_alt_digits, 0},
+ {"era_d_t_fmt", tok_era_d_t_fmt, 0},
{"",},
- {"space", tok_space, 0},
- {"n_sign_posn", tok_n_sign_posn, 0},
+ {"grouping", tok_grouping, 0},
{"",},
+ {"space", tok_space, 0},
+ {"",}, {"",},
{"decimal_point", tok_decimal_point, 0},
+ {"charclass", tok_charclass, 0},
+ {"int_frac_digits", tok_int_frac_digits, 0},
+ {"order_start", tok_order_start, 0},
+ {"mon_grouping", tok_mon_grouping, 0},
+ {"thousands_sep", tok_thousands_sep, 0},
{"from", tok_from, 0},
+ {"nostr", tok_nostr, 0},
+ {"",}, {"",}, {"",}, {"",},
{"lower", tok_lower, 0},
- {"",}, {"",},
- {"n_cs_precedes", tok_n_cs_precedes, 0},
+ {"",}, {"",}, {"",},
+ {"order_end", tok_order_end, 0},
{"",},
- {"abmon", tok_abmon, 0},
- {"escape_char", tok_escape_char, 0},
+ {"frac_digits", tok_frac_digits, 0},
{"",}, {"",}, {"",},
- {"int_curr_symbol", tok_int_curr_symbol, 0},
+ {"alnum", tok_alnum, 0},
{"",}, {"",},
- {"backward", tok_backward, 0},
+ {"repertoiremap", tok_repertoiremap, 0},
{"",},
+ {"upper", tok_upper, 0},
+ {"escape_char", tok_escape_char, 0},
+ {"",}, {"",}, {"",},
{"abday", tok_abday, 0},
- {"",}, {"",}, {"",}, {"",},
- {"currency_symbol", tok_currency_symbol, 0},
- {"frac_digits", tok_frac_digits, 0},
- {"",},
- {"grouping", tok_grouping, 0},
+ {"yesstr", tok_yesstr, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
{"",},
- {"cntrl", tok_cntrl, 0},
- {"",}, {"",}, {"",}, {"",},
- {"blank", tok_blank, 0},
- {"",}, {"",}, {"",}, {"",},
- {"int_frac_digits", tok_int_frac_digits, 0},
- {"",}, {"",}, {"",}, {"",},
- {"alt_digits", tok_alt_digits, 0},
+ {"yesexpr", tok_yesexpr, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"graph", tok_graph, 0},
{"",}, {"",}, {"",}, {"",},
{"am_pm", tok_am_pm, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
{"",}, {"",}, {"",}, {"",},
- {"alnum", tok_alnum, 0},
- {"",},
- {"mon_grouping", tok_mon_grouping, 0},
+ {"era_year", tok_era_year, 0},
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
index 6d1543c..7845b4b 100644
--- a/locale/programs/locfile-token.h
+++ b/locale/programs/locfile-token.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -49,6 +49,8 @@ enum token_t
tok_g2esc,
tok_g3esc,
+ tok_charids,
+
tok_code_set_name,
tok_mb_cur_max,
tok_mb_cur_min,
@@ -56,6 +58,7 @@ enum token_t
tok_width,
tok_width_variable,
tok_width_default,
+ tok_repertoiremap,
tok_lc_ctype,
tok_copy,
diff --git a/locale/programs/locfile.h b/locale/programs/locfile.h
index 604e726..697af64 100644
--- a/locale/programs/locfile.h
+++ b/locale/programs/locfile.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -55,6 +55,7 @@ struct localedef_t
/* Declared in localedef.c. */
extern int be_quiet;
+extern const char *repertoiremap;
/* Found in localedef.c. */
void def_to_process (const char *name, int category);
diff --git a/locale/programs/repertoire.c b/locale/programs/repertoire.c
new file mode 100644
index 0000000..1f219ec
--- /dev/null
+++ b/locale/programs/repertoire.c
@@ -0,0 +1,323 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#include <error.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "linereader.h"
+#include "charset.h"
+#include "repertoire.h"
+#include "simple-hash.h"
+
+
+extern void *xmalloc (size_t __n);
+
+
+/* Simple keyword hashing for the repertoiremap. */
+static struct repertoire_t *parse_repertoiremap (const char *filename);
+static const struct keyword_t *repertoiremap_hash (const char *str, int len);
+
+
+struct repertoire_t *
+repertoire_read (const char *filename)
+{
+ const char *pathnfile;
+ struct repertoire_t *result = NULL;
+
+ if (euidaccess (filename, R_OK) >= 0)
+ pathnfile = filename;
+ else if (filename[0] != '/')
+ {
+ char *cp = xmalloc (strlen (filename) + sizeof CHARMAP_PATH + 1);
+ stpcpy (stpcpy (stpcpy (cp, CHARMAP_PATH), "/"), filename);
+
+ pathnfile = (const char *) cp;
+ }
+ else
+ pathnfile = NULL;
+
+ if (pathnfile != NULL)
+ {
+ result = parse_repertoiremap (pathnfile);
+
+ if (result == NULL && !be_quiet)
+ error (0, errno, _("repertoire map file `%s' not found"), filename);
+ }
+
+ return result;
+}
+
+
+static struct repertoire_t *
+parse_repertoiremap (const char *filename)
+{
+ struct linereader *cmfile;
+ struct repertoire_t *result;
+ int state;
+ char *from_name = NULL;
+ char *to_name = NULL;
+
+ /* Determine path. */
+ cmfile = lr_open (filename, repertoiremap_hash);
+ if (cmfile == NULL)
+ {
+ if (strchr (filename, '/') == NULL)
+ {
+ /* Look in the systems charmap directory. */
+ char *buf = xmalloc (strlen (filename) + 1
+ + sizeof (REPERTOIREMAP_PATH));
+
+ stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"), filename);
+ cmfile = lr_open (buf, repertoiremap_hash);
+
+ if (cmfile == NULL)
+ free (buf);
+ }
+
+ if (cmfile == NULL)
+ return NULL;
+ }
+
+ /* Allocate room for result. */
+ result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t));
+ memset (result, '\0', sizeof (struct repertoire_t));
+
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+ obstack_init (&result->mem_pool);
+
+ if (init_hash (&result->char_table, 256))
+ {
+ free (result);
+ return NULL;
+ }
+
+ /* We use a state machine to describe the charmap description file
+ format. */
+ state = 1;
+ while (1)
+ {
+ /* What's on? */
+ struct token *now = lr_token (cmfile, NULL);
+ enum token_t nowtok = now->tok;
+ struct token *arg;
+
+ if (nowtok == tok_eof)
+ break;
+
+ switch (state)
+ {
+ case 1:
+ /* We haven't yet read any character definition. This is where
+ we accept escape_char and comment_char definitions. */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_escape_char || nowtok == tok_comment_char)
+ {
+ /* We know that we need an argument. */
+ arg = lr_token (cmfile, NULL);
+
+ if (arg->tok != tok_ident)
+ {
+ lr_error (cmfile, _("syntax error in prolog: %s"),
+ _("bad argument"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (arg->val.str.len != 1)
+ {
+ lr_error (cmfile, _("\
+argument to <%s> must be a single character"),
+ nowtok == tok_escape_char ? "escape_char"
+ : "comment_char");
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_escape_char)
+ cmfile->escape_char = *arg->val.str.start;
+ else
+ cmfile->comment_char = *arg->val.str.start;
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+ }
+
+ if (nowtok == tok_charids)
+ {
+ lr_ignore_rest (cmfile, 1);
+
+ state = 2;
+ continue;
+ }
+
+ /* Otherwise we start reading the character definitions. */
+ state = 2;
+ /* FALLTHROUGH */
+
+ case 2:
+ /* We are now are in the body. Each line
+ must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* If the previous line was not completely correct free the
+ used memory. */
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.start,
+ now->val.str.len);
+ to_name = NULL;
+
+ state = 3;
+ continue;
+
+ case 3:
+ /* We have two possibilities: We can see an ellipsis or an
+ encoding value. */
+ if (nowtok == tok_ellipsis)
+ {
+ state = 4;
+ continue;
+ }
+ /* FALLTHROUGH */
+
+ case 5:
+ /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where
+ the xxx mean a hexadecimal value. */
+ state = 2;
+
+ errno = 0;
+ if (nowtok != tok_ucs2 && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no <Uxxxx> or <Uxxxxxxxx> value given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* We've found a new valid definition. */
+ charset_new_char (cmfile, &result->char_table, 4,
+ now->val.charcode.val, from_name, to_name);
+
+ /* Ignore the rest of the line. */
+ lr_ignore_rest (cmfile, 0);
+
+ from_name = NULL;
+ to_name = NULL;
+
+ continue;
+
+ case 4:
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no symbolic name given for end of range"));
+
+ lr_ignore_rest (cmfile, 0);
+ state = 2;
+ continue;
+ }
+
+ /* Copy the to-name in a safe place. */
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ cmfile->token.val.str.start,
+ cmfile->token.val.str.len);
+
+ state = 5;
+ continue;
+
+ case 90:
+ if (nowtok != tok_charids)
+ lr_error (cmfile, _("\
+`%1$s' definition does not end with `END %1$s'"), "CHARIDS");
+
+ lr_ignore_rest (cmfile, nowtok == tok_charids);
+ break;
+ }
+
+ break;
+ }
+
+ if (state != 2 && state != 90 && !be_quiet)
+ error (0, 0, _("%s: premature end of file"), cmfile->fname);
+
+ lr_close (cmfile);
+
+ return result;
+}
+
+
+static const struct keyword_t *
+repertoiremap_hash (const char *str, int len)
+{
+ static const struct keyword_t wordlist[0] =
+ {
+ {"escape_char", tok_escape_char, 1},
+ {"comment_char", tok_comment_char, 1},
+ {"CHARIDS", tok_charids, 0},
+ {"END", tok_end, 0},
+ };
+
+ if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0)
+ return &wordlist[0];
+ if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0)
+ return &wordlist[1];
+ if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0)
+ return &wordlist[2];
+ if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0)
+ return &wordlist[3];
+
+ return NULL;
+}
diff --git a/locale/programs/repertoire.h b/locale/programs/repertoire.h
new file mode 100644
index 0000000..7befeb4
--- /dev/null
+++ b/locale/programs/repertoire.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _REPERTOIREMAP_H
+#define _REPERTOIREMAP_H 1
+
+#include <obstack.h>
+
+#include "simple-hash.h"
+#include "linereader.h"
+
+struct repertoire_t
+{
+ struct obstack mem_pool;
+ hash_table char_table;
+};
+
+
+/* Prototypes for repertoire map handling functions. */
+struct repertoire_t *repertoire_read (const char *filename);
+
+#endif /* repertoiremap.h */
diff --git a/locale/programs/stringtrans.c b/locale/programs/stringtrans.c
index 34b107e..b810129 100644
--- a/locale/programs/stringtrans.c
+++ b/locale/programs/stringtrans.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -92,7 +92,8 @@ translate_string (char *str, struct charset_t *charset)
return NULL;
}
- value = charset_find_value (charset, str + 1, tp - (str + 1));
+ value = charset_find_value (&charset->char_table, str + 1,
+ tp - (str + 1));
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
free (buf);
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index 7a74d1f..8ec112f 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,10 @@
+1998-04-30 Ulrich Drepper <drepper@cygnus.com>
+
+ * Makefile: Install repertoire maps.
+ * localedata/mnemonic.ds: Moved into repertoiremaps subdir.
+ * localedata/repertoiremaps/charids.894: New file.
+ * localedata/repertoiremaps/mnemonic.ds: New file.
+
1998-04-29 Ulrich Drepper <drepper@cygnus.com>
* tst-locale.sh: Add comment for when multi-byte charsets in
diff --git a/localedata/Makefile b/localedata/Makefile
index 9be179a..c4370d8 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -30,6 +30,10 @@ charmaps := $(filter-out $(addprefix charmaps/, CVS RCS %~), \
locales := $(filter-out $(addprefix locales/, CVS RCS %~), \
$(wildcard locales/*))
+# List of repertoire maps.
+repertoires := $(filter-out $(addprefix repertoiremaps/, CVS RCS %~), \
+ $(wildcard repertoiremaps/*))
+
test-srcs := collate-test xfrm-test tst-fmon tst-rpmatch
test-input := de_DE.ISO-8859-1 da_DK.ISO-8859-1 fr_CA,2.13.ISO-8859-1
test-output := $(foreach s, .out .xout, \
@@ -42,7 +46,7 @@ ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
$(addsuffix .def,$(ld-test-names)))
distribute := CHECKSUMS tst-fmon.sh tst-fmon.data ChangeLog sort-test.sh \
- README mnemonic.ds fr_CA,2.13.in de_DE.in da_DK.in \
+ README fr_CA,2.13.in de_DE.in da_DK.in $(repertoiremaps) \
$(charmaps) $(locales) $(ld-test-srcs) tst-rpmatch.sh \
SUPPORTED tst-locale.sh
@@ -50,13 +54,15 @@ distribute := CHECKSUMS tst-fmon.sh tst-fmon.data ChangeLog sort-test.sh \
include ../Makeconfig
# Files to install.
-install-others := $(addprefix $(inst_i18ndir)/, $(charmaps) $(locales))
+install-others := $(addprefix $(inst_i18ndir)/, $(charmaps) $(locales) \
+ $(repertoiremaps))
include ../Rules
# Install the source files in the appropriate directories.
$(inst_i18ndir)/charmaps/%: charmaps/% $(+force); $(do-install)
$(inst_i18ndir)/locales/%: locales/% $(+force); $(do-install)
+$(inst_i18ndir)/repertoiremaps/%: repertoiremaps/% $(+force); $(do-install)
ifeq (no,$(cross-compiling))
diff --git a/localedata/repertoiremaps/charids.894 b/localedata/repertoiremaps/charids.894
new file mode 100644
index 0000000..868a478
--- /dev/null
+++ b/localedata/repertoiremaps/charids.894
@@ -0,0 +1,511 @@
+<escape_char> /
+# Charids refering to ISO 10646/Correspondance des caract<e!>res avec ISO 10646
+# Version: 1994-08-25
+#<comment_char> #
+CHARIDS
+<NUL> <U0000> # NULL / NUL
+<SOH> <U0001> # START OF HEADING / D<E'>BUT D'ENT<E/>>TE
+<STX> <U0002> # START OF TEXT / D<E'>BUT DE TEXTE
+<ETX> <U0003> # END OF TEXT / FIN DE TEXTE
+<EOT> <U0004> # END OF TRANSMISSION / FIN DE TRANSMISSION
+<ENQ> <U0005> # ENQUIRY / DEMANDE
+<ACK> <U0006> # ACKNOWLEDGE / ACCUS<E'> DE R<E'>CEPTION POSITIF
+<BEL> <U0007> # BELL / SONNERIE
+<BS> <U0008> # BACKSPACE / ESPACE ARRI<E!>RE
+<HT> <U0009> # CHARACTER TABULATION / TABULATION HORIZONTALE
+<LF> <U000A> # LINE FEED / INTERLIGNE
+<VT> <U000B> # LINE TABULATION / TABULATION VERTICALE
+<FF> <U000C> # FORM FEED / PAGE SUIVANTE
+<CR> <U000D> # CARRIAGE RETURN / RETOUR DE CHARIOT
+<SO> <U000E> # SHIFT OUT / HORS CODE
+<SI> <U000F> # SHIFT IN / EN CODE
+<DLE> <U0010> # DATA LINK ESCAPE / <E'>CHAPPEMENT TRANSMISSION
+<DC1> <U0011> # DEVICE CONTROL ONE / COMMANDE D'APPAREIL UN
+<DC2> <U0012> # DEVICE CONTROL TWO / COMMANDE D'APPAREIL DEUX
+<DC3> <U0013> # DEVICE CONTROL THREE / COMMANDE D'APPAREIL TROIS
+<DC4> <U0014> # DEVICE CONTROL FOUR / COMMANDE D'APPAREIL QUATRE
+<NAK> <U0015> # NEGATIVE ACKNOWLEDGE / ACCUS<E'> DE R<E'>CEPTION N<E'>GATIF
+<SYN> <U0016> # SYNCHRONOUS IDLE / SYNCHRONISATION
+<ETB> <U0017> # END OF TRANSMISSION BLOCK / FIN DE TRANSMISSION DE BLOC
+<CAN> <U0018> # CANCEL / ANNULATION
+<EM> <U0019> # END OF MEDIUM / FIN DE SUPPORT
+<SUB> <U001A> # SUBSTITUTE / CARACT<E!>RE DE SUBSTITUTION
+<ESC> <U001B> # ESCAPE / <E'>CHAPPEMENT
+<IS4> <U001C> # FILE SEPARATOR / S<E'>PARATEUR DE FICHIER
+<FS> <U001C> # INFORMATION SEPARATOR FOUR / S<E'>PARATEUR DE FICHIER
+<IFS> <U001C> # INFORMATION SEPARATOR FOUR / S<E'>PARATEUR DE FICHIER
+<IS3> <U001D> # GROUP SEPARATOR / S<E'>PARATEUR DE GROUPE
+<GS> <U001D> # INFORMATION SEPARATOR THREE / S<E'>PARATEUR DE GROUPE
+<IGS> <U001D> # INFORMATION SEPARATOR THREE / S<E'>PARATEUR DE GROUPE
+<IS2> <U001E> # RECORD SEPARATOR / S<E'>PARATEUR D'ARTICLE
+<RS> <U001E> # INFORMATION SEPARATOR TWO / S<E'>PARATEUR D'ARTICLE
+<IRS> <U001E> # INFORMATION SEPARATOR TWO / S<E'>PARATEUR D'ARTICLE
+<IS1> <U001F> # UNIT SEPARATOR / S<E'>PARATEUR DE SOUS-ARTICLE
+<US> <U001F> # INFORMATION SEPARATOR ONE / S<E'>PARATEUR DE SOUS-ARTICLE
+<SP> <U0020> # SPACE / ESPACE
+<!> <U0021> # EXCLAMATION MARK / POINT D'EXCLAMATION
+<"> <U0022> # QUOTATION MARK / GUILLEMET
+<H-> <U0023> # NUMBER SIGN / CROISILLON
+<!S> <U0024> # DOLLAR SIGN / SYMBOLE DOLLAR
+<%> <U0025> # PERCENT SIGN / SYMBOLE POURCENT
+<&> <U0026> # AMPERSAND / PERLU<E!>TE
+<'> <U0027> # APOSTROPHE / APOSTROPHE
+<(> <U0028> # LEFT PARENTHESIS / PARENTH<E!>SE GAUCHE
+<)> <U0029> # RIGHT PARENTHESIS / PARENTH<E!>SE DROITE
+<*> <U002A> # ASTERISK / AST<E'>RISQUE
+<+> <U002B> # PLUS SIGN / SIGNE PLUS
+<,> <U002C> # COMMA / VIRGULE
+<-> <U002D> # HYPHEN-MINUS / TRAIT D'UNION-SIGNE MOINS
+<.> <U002E> # FULL STOP / POINT
+<//> <U002F> # SOLIDUS / BARRE OBLIQUE
+<0> <U0030> # DIGIT ZERO / CHIFFRE Z<E'>RO
+<1> <U0031> # DIGIT ONE / CHIFFRE UN
+<2> <U0032> # DIGIT TWO / CHIFFRE DEUX
+<3> <U0033> # DIGIT THREE / CHIFFRE TROIS
+<4> <U0034> # DIGIT FOUR / CHIFFRE QUATRE
+<5> <U0035> # DIGIT FIVE / CHIFFRE CINQ
+<6> <U0036> # DIGIT SIX / CHIFFRE SIX
+<7> <U0037> # DIGIT SEVEN / CHIFFRE SEPT
+<8> <U0038> # DIGIT EIGHT / CHIFFRE HUIT
+<9> <U0039> # DIGIT NINE / CHIFFRE NEUF
+<:> <U003A> # COLON / DEUX-POINTS
+<;> <U003B> # SEMICOLON / POINT-VIRGULE
+<<> <U003C> # LESS-THAN SIGN / SIGNE INF<E'>RIEUR <A!>
+<=> <U003D> # EQUALS SIGN / SIGNE <E'>GAL
+</>> <U003E> # GREATER-THAN SIGN / SIGNE SUP<E'>RIEUR <A!>
+<?> <U003F> # QUESTION MARK / POINT D'INTERROGATION
+<@> <U0040> # COMMERCIAL AT / A COMMERCIAL
+<Oa> <U0040> # COMMERCIAL AT / A COMMERCIAL
+#
+# In the following, CAP stands for LATIN CAPITAL LETTER
+# CAPLIG stands for LATIN CAPITAL LIGATURE
+# IVT stnads for INVERTED
+# LOW stands for LATIN SMALL LETTER
+# LOWLIG stands for LATIN SMALL LIGATURE
+# OI stands for ORDINAL INDICATOR
+# PDAQ stands for POINTING DOUBLE ANGLE QUOTATION
+# VF stands for VULGAR FRACTION
+#
+# Ci-apr<e!>s, FO se lit FRACTION ORDINAIRE
+# GAD se lit GUILLEMET ANGULAIRE DOUBLE
+# IO se lit INDICATEUR ORDINAL
+# INV se lit INVERS<E'>
+# LIGMAJ se lit LIGATURE MAJUSCULE LATINE
+# LIGMIN se lit LIGATURE MINUSCULE LATINE
+# MAJ se lit LETTRE MAJUSCULE LATINE
+# MIN se lit LETTRE MINUSCULE LATINE
+#
+<A> <U0041> # CAP A / MAJ A
+<B> <U0042> # CAP B / MAJ B
+<C> <U0043> # CAP C / MAJ C
+<D> <U0044> # CAP D / MAJ D
+<E> <U0045> # CAP E / MAJ E
+<F> <U0046> # CAP F / MAJ F
+<G> <U0047> # CAP G / MAJ G
+<H> <U0048> # CAP H / MAJ H
+<I> <U0049> # CAP I / MAJ I
+<J> <U004A> # CAP J / MAJ J
+<K> <U004B> # CAP K / MAJ K
+<L> <U004C> # CAP L / MAJ L
+<M> <U004D> # CAP M / MAJ M
+<N> <U004E> # CAP N / MAJ N
+<O> <U004F> # CAP O / MAJ O
+<P> <U0050> # CAP P / MAJ P
+<Q> <U0051> # CAP Q / MAJ Q
+<R> <U0052> # CAP R / MAJ R
+<S> <U0053> # CAP S / MAJ S
+<T> <U0054> # CAP T / MAJ T
+<U> <U0055> # CAP U / MAJ U
+<V> <U0056> # CAP V / MAJ V
+<W> <U0057> # CAP W / MAJ W
+<X> <U0058> # CAP X / MAJ X
+<Y> <U0059> # CAP Y / MAJ Y
+<Z> <U005A> # CAP Z / MAJ Z
+<!(> <U005B> # LEFT SQUARE BRACKET / CROCHET GAUCHE
+<////> <U005C> # REVERSE SOLIDUS / BARRE OBLIQUE INVERS<E'>E
+<)!> <U005D> # RIGHT SQUARE BRACKET / CROCHET DROIT
+<'/>> <U005E> # CIRCUMFLEX ACCENT / ACCENT CIRCONFLEXE
+<_> <U005F> # LOW LINE / TRAIT BAS
+<'!> <U0060> # GRAVE ACCENT / ACCENT GRAVE
+<a> <U0061> # LOW A / MIN A
+<b> <U0062> # LOW B / MIN B
+<c> <U0063> # LOW C / MIN C
+<d> <U0064> # LOW D / MIN D
+<e> <U0065> # LOW E / MIN E
+<f> <U0066> # LOW F / MIN F
+<g> <U0067> # LOW G / MIN G
+<h> <U0068> # LOW H / MIN H
+<i> <U0069> # LOW I / MIN I
+<j> <U006A> # LOW J / MIN J
+<k> <U006B> # LOW K / MIN K
+<l> <U006C> # LOW L / MIN L
+<m> <U006D> # LOW M / MIN M
+<n> <U006E> # LOW N / MIN N
+<o> <U006F> # LOW O / MIN O
+<p> <U0070> # LOW P / MIN P
+<q> <U0071> # LOW Q / MIN Q
+<r> <U0072> # LOW R / MIN R
+<s> <U0073> # LOW S / MIN S
+<t> <U0074> # LOW T / MIN T
+<u> <U0075> # LOW U / MIN U
+<v> <U0076> # LOW V / MIN V
+<w> <U0077> # LOW W / MIN W
+<x> <U0078> # LOW X / MIN X
+<y> <U0079> # LOW Y / MIN Y
+<z> <U007A> # LOW Z / MIN Z
+<<(> <U007B> # LEFT CURLY BRACKET / ACCOLADE GAUCHE
+<!!> <U007C> # VERTICAL LINE / LIGNE VERTICALE
+<)/>> <U007D> # RIGHT CURLY BRACKET / ACCOLADE DROITE
+<'?> <U007E> # TILDE
+#
+# Values #x.. : IBM 850 code points / Valeurs #x.. : code IBM 850
+#
+<NS> <U00A0> #xff # NO-BREAK SPACE / ESPACE LIANT
+<!I> <U00A1> #xad # IVT EXCLAMATION MARK / POINT D'EXCLAMATION INV
+<!C> <U00A2> #xbd # CENT SIGN / SYMBOLE CENTIME
+<L-> <U00A3> #x9c # POUND SIGN / SYMBOLE LIVRE
+<Xo> <U00A4> #xcf # CURRENCY SIGN / SYMBOLE MON<E'>TAIRE
+<Y-> <U00A5> #xbe # YEN SIGN /SYMBOLE YEN
+<!B> <U00A6> #xdd # BROKEN BAR / BARRE VERTICALE INTERROMPUE
+<So> <U00A7> #xf5 # SECTION SIGN / SYMBOLE PARAGRAPHE
+<':> <U00A8> #xf9 # DIAERESIS / TR<E'>MA
+<OC> <U00A9> #xb8 # COPYRIGHT SIGN / SYMBOLE COPYRIGHT
+<-a> <U00AA> #xa6 # FEMININE OI / IO F<E'>MININ
+<<<> <U00AB> #xaf # LEFT PDAQ / GAD VERS LA GAUCHE
+<7!> <U00AC> #xaa # NOT SIGN / SIGNE N<E'>GATION
+<--> <U00AD> #xf0 # SOFT HYPHEN / TIRET VIRTUEL
+<OR> <U00AE> #xa9 # REGISTERED SIGN / SYMBOLE MARQUE D<E'>POS<E'>E
+<'-> <U00AF> #xee # MACRON
+<DG> <U00B0> #xf8 # DEGREE SIGN / SYMBOLE DEGR<E'>
+<+-> <U00B1> #xf1 # PLUS-MINUS SIGN / SIGNE PLUS OU MOINS
+<2S> <U00B2> #xfd # SUPERSCRIPT TWO / EXPOSANT DEUX
+<3S> <U00B3> #xfc # SUPERSCRIPT THREE / EXPOSANT TROIS
+<''> <U00B4> #xef # ACUTE ACCENT / ACCENT AIGU
+<My> <U00B5> #xe6 # MICRO SIGN / SYMBOLE MICRO
+<9I> <U00B6> #xf4 # PILCROW SIGN / SYMBOLE ALIN<E'>A
+<.M> <U00B7> #xfa # MIDDLE DOT / POINT M<E'>DIAN
+<',> <U00B8> #xf7 # CEDILLA / C<E'>DILLE
+<1S> <U00B9> #xfb # SUPERSCRIPT ONE / EXPOSANT UN
+<-o> <U00BA> #xa7 # MASCULINE OI / IO MASCULIN
+</>/>> <U00BB> #xae # RIGHT PDAQ / GAD VERS LA DROITE
+<14> <U00BC> #xac # VF ONE QUARTER / FO UN QUART
+<12> <U00BD> #xab # VF ONE HALF / FO UN DEMI
+<34> <U00BE> #xf3 # VF THREE QUARTERS / FO TROIS QUARTS
+<?I> <U00BF> #xa8 # IVT QUESTION MARK /POINT D'INTERROGATION INV
+<A!> <U00C0> #xb7 # CAP A WITH GRAVE / MAJ A ACCENT GRAVE
+<A'> <U00C1> #xb5 # CAP A WITH ACUTE / MAJ A ACCENT AIGU
+<A/>> <U00C2> #xb6 # CAP A WITH CIRCUMFLEX / MAJ A ACCENT CIRCONFLEXE
+<A?> <U00C3> #xc7 # CAP A WITH TILDE / MAJ A TILDE
+<A:> <U00C4> #x8e # CAP A WITH DIAERESIS / MAJ A TR<E'>MA
+<AA> <U00C5> #x8f # CAP A WITH RING ABOVE / MAJ A ROND
+<AE> <U00C6> #x92 # CAPLIG / LIGMAJ AE
+<C,> <U00C7> #x80 # CAP C WITH CEDILLA / MAJ C C<E'>DILLE
+<E!> <U00C8> #xd4 # CAP E WITH GRAVE / MAJ E ACCENT GRAVE
+<E'> <U00C9> #x90 # CAP E WITH ACUTE / MAJ E ACCENT AIGU
+<E/>> <U00CA> #xd2 # CAP E WITH CIRCUMFLEX / MAJ E ACCENT CIRCONFLEXE
+<E:> <U00CB> #xd3 # CAP E WITH DIAERESIS / MAJ E TR<E'>MA
+<I!> <U00CC> #xde # CAP I WITH GRAVE / MAJ I ACCENT GRAVE
+<I'> <U00CD> #xd6 # CAP I WITH ACUTE / MAJ I ACCENT AIGU
+<I/>> <U00CE> #xd7 # CAP I WITH CIRCUMFLEX / MAJ I ACCENT CIRCONFLEXE
+<I:> <U00CF> #xd8 # CAP I WITH DIAERESIS / MAJ I TR<E'>MA
+<D-> <U00D0> #xd1 # CAP ETH (Icelandic) / MAJ ETH (islandaise)
+<N?> <U00D1> #xa5 # CAP N WITH TILDE / MAJ N TILDE
+<O!> <U00D2> #xe3 # CAP O WITH GRAVE / MAJ O ACCENT GRAVE
+<O'> <U00D3> #xe0 # CAP O WITH ACUTE / MAJ O ACCENT AIGU
+<O/>> <U00D4> #xe2 # CAP O WITH CIRCUMFLEX / MAJ O ACCENT CIRCONFLEXE
+<O?> <U00D5> #xe5 # CAP O WITH TILDE / MAJ O TILDE
+<O:> <U00D6> #x99 # CAP O WITH DIAERESIS / MAJ O TR<E'>MA
+<*X> <U00D7> #x9e # MULTIPLICATION SIGN / SIGNE MULTIPLICATION
+<O//> <U00D8> #x9d # CAP O WITH STROKE / MAJ O BARR<E'>E EN OBLIQUE
+<U!> <U00D9> #xeb # CAP U WITH GRAVE / MAJ U ACCENT GRAVE
+<U'> <U00DA> #xe9 # CAP U WITH ACUTE / MAJ U ACCENT AIGU
+<U/>> <U00DB> #xea # CAP U WITH CIRCUMFLEX / MAJ U ACCENT CIRCONFLEXE
+<U:> <U00DC> #x9a # CAP U WITH DIAERESIS / MAJ U TR<E'>MA
+<Y'> <U00DD> #xed # CAP Y WITH ACUTE / MAJ Y ACCENT AIGU
+<TH> <U00DE> #xe8 # CAP THORN (Icelandic) / MAJ THORN (islandaise)
+<ss> <U00DF> #xe1 # LOW SHARP S (German)/ MIN S DUR (allemande)
+<a!> <U00E0> #x85 # LOW A WITH GRAVE / MIN A ACCENT GRAVE
+<a'> <U00E1> #xa0 # LOW A WITH ACUTE / MIN A ACCENT AIGU
+<a/>> <U00E2> #x83 # LOW A WITH CIRCUMFLEX / MIN A ACCENT CIRCONFLEXE
+<a?> <U00E3> #xc6 # LOW A WITH TILDE / MIN A TILDE
+<a:> <U00E4> #x84 # LOW A WITH DIAERESIS / MIN A TR<E'>MA
+<aa> <U00E5> #x86 # LOW A WITH RING ABOVE / MIN A ROND
+<ae> <U00E6> #x91 # LOWLIG / LIGMIN AE
+<c,> <U00E7> #x87 # LOW C WITH CEDILLA / MIN C C<E'>DILLE
+<e!> <U00E8> #x8a # LOW E WITH GRAVE / MIN E ACCENT GRAVE
+<e'> <U00E9> #x82 # LOW E WITH ACUTE / MIN E ACCENT AIGU
+<e/>> <U00EA> #x88 # LOW E WITH CIRCUMFLEX / MIN E ACCENT CIRCONFLEXE
+<e:> <U00EB> #x89 # LOW E WITH DIAERESIS / MIN E TR<E'>MA
+<i!> <U00EC> #x8d # LOW I WITH GRAVE / MIN I ACCENT GRAVE
+<i'> <U00ED> #xa1 # LOW I WITH ACUTE / MIN I ACCENT AIGU
+<i/>> <U00EE> #x8c # LOW I WITH CIRCUMFLEX / MIN I ACCENT CIRCONFLEXE
+<i:> <U00EF> #x8b # LOW I WITH DIAERESIS / MIN I TR<E'>MA
+<d-> <U00F0> #xd0 # LOW ETH (Icelandic) / MIN ETH (islandaise)
+<n?> <U00F1> #xa4 # LOW N WITH TILDE / MIN N TILDE
+<o!> <U00F2> #x95 # LOW O WITH GRAVE / MIN O ACCENT GRAVE
+<o'> <U00F3> #xa2 # LOW O WITH ACUTE / MIN O ACCENT AIGU
+<o/>> <U00F4> #x93 # LOW O WITH CIRCUMFLEX / MIN O ACCENT CIRCONFLEXE
+<o?> <U00F5> #xe4 # LOW O WITH TILDE / MIN O TILDE
+<o:> <U00F6> #x94 # LOW O WITH DIAERESIS / MIN O TR<E'>MA
+<-:> <U00F7> #xf6 # DIVISION SIGN / SIGNE DIVISION
+<o//> <U00F8> #x9b # LOW O WITH STROKE / MIN O BARR<E'>E EN OBLIQUE
+<u!> <U00F9> #x97 # LOW U WITH GRAVE / MIN U ACCENT GRAVE
+<u'> <U00FA> #xa3 # LOW U WITH ACUTE / MIN U ACCENT AIGU
+<u/>> <U00FB> #x96 # LOW U WITH CIRCUMFLEX / MIN U ACCENT CIRCONFLEXE
+<u:> <U00FC> #x81 # LOW U WITH DIAERESIS / MIN U TR<E'>MA
+<y:> <U00FF> #x98 # LOW Y WITH DIAERESIS / MIN Y TR<E'>MA
+<th> <U00FE> #xe7 # LOW THORN (Icelandic) / MIN THORN (islandaise)
+<y'> <U00FD> #xec # LOW Y WITH ACUTE / MIN Y ACCENT AIGU
+#
+# Par prudence les symboles suivants ont <e'>t<e'> d<e'>finis, m<e/>>me
+# s'ils ne sont pas utilis<e'>s dans les LOCALEs canadiens.
+#
+# By prudence the following symbols have been defined, even if they are not
+# refered to in the Canadian LOCALEs.
+#
+<alert> <U0007> # BELL / SONNERIE
+<backspace> <U0008> # BACKSPACE / ESPACE ARRI<E!>RE
+<tab> <U0009> # CHARACTER TABULATION / TABULATION HORIZONTALE
+<newline> <U000a> # LINE FEED / INTERLIGNE
+<vertical-tab> <U000b> # LINE TABULATION / TABULATION VERTICALE
+<form-feed> <U000c> # FORM FEED / PAGE SUIVANTE
+<carriage-return> <U000d> # CARRIAGE RETURN / RETOUR DE CHARIOT
+<space> <U0020> # SPACE / ESPACE
+<exclamation-mark> <U0021> # EXCLAMATION MARK / POINT D'EXCLAMATION
+<quotation-mark> <U0022> # QUOTATION MARK / GUILLEMET
+<number-sign> <U0023> # NUMBER SIGN / CROISILLON
+<dollar-sign> <U0024> # DOLLAR SIGN / SYMBOLE DOLLAR
+<percent-sign> <U0025> # PERCENT SIGN / SYMBOLE POURCENT
+<ampersand> <U0026> # AMPERSAND / PERLU<E!>TE
+<apostrophe> <U0027> # APOSTROPHE / APOSTROPHE
+<left-parenthesis> <U0028> # LEFT PARENTHESIS / PARENTH<E!>SE GAUCHE
+<right-parenthesis> <U0029> # RIGHT PARENTHESIS / PARENTH<E!>SE DROITE
+<asterisk> <U002a> # ASTERISK / AST<E'>RISQUE
+<plus-sign> <U002b> # PLUS SIGN / SIGNE PLUS
+<comma> <U002c> # COMMA / VIRGULE
+<hyphen> <U002d> # HYPHEN-MINUS / TRAIT D'UNION-SIGNE MOINS
+<hyphen-minus> <U002d> # HYPHEN-MINUS / TRAIT D'UNION-SIGNE MOINS
+<period> <U002e> # FULL STOP / POINT
+<full-stop> <U002e> # FULL STOP / POINT
+<slash> <U002f> # SOLIDUS / BARRE OBLIQUE
+<solidus> <U002f> # SOLIDUS / BARRE OBLIQUE
+<zero> <U0030> # DIGIT ZERO / CHIFFRE Z<E'>RO
+<one> <U0031> # DIGIT ONE / CHIFFRE UN
+<two> <U0032> # DIGIT TWO / CHIFFRE DEUX
+<three> <U0033> # DIGIT THREE / CHIFFRE TROIS
+<four> <U0034> # DIGIT FOUR / CHIFFRE QUATRE
+<five> <U0035> # DIGIT FIVE / CHIFFRE CINQ
+<six> <U0036> # DIGIT SIX / CHIFFRE SIX
+<seven> <U0037> # DIGIT SEVEN / CHIFFRE SEPT
+<eight> <U0038> # DIGIT EIGHT / CHIFFRE HUIT
+<nine> <U0039> # DIGIT NINE / CHIFFRE NEUF
+<colon> <U003a> # COLON / DEUX-POINTS
+<semicolon> <U003b> # SEMICOLON / POINT-VIRGULE
+<less-than-sign> <U003c> # LESS-THAN SIGN / SIGNE INF<E'>RIEUR <A!>
+<equals-sign> <U003d> # EQUALS SIGN / SIGNE <E'>GAL
+<greater-than-sign> <U003e> # GREATER-THAN SIGN / SIGNE SUP<E'>RIEUR <A!>
+<question-mark> <U003f> # QUESTION MARK / POINT D'INTERROGATION
+<commercial-at> <U0040> # COMMERCIAL AT / A COMMERCIAL
+<left-square-bracket> <U005b> # LEFT SQUARE BRACKET / CROCHET GAUCHE
+<backslash> <U005c> # REVERSE SOLIDUS / BARRE OBLIQUE INVERS<E'>E
+<reverse-solidus> <U005c> # REVERSE SOLIDUS / BARRE OBLIQUE INVERS<E'>E
+<right-square-bracket> <U005d> # RIGHT SQUARE BRACKET / CROCHET DROIT
+<circumflex> <U005e> # CIRCUMFLEX ACCENT / ACCENT CIRCONFLEXE
+<circumflex-accent> <U005e> # CIRCUMFLEX ACCENT / ACCENT CIRCONFLEXE
+<underscore> <U005f> # LOW LINE / TRAIT BAS
+<low-line> <U005f> # LOW LINE / TRAIT BAS
+<grave-accent> <U0060> # GRAVE ACCENT / ACCENT GRAVE
+<left-brace> <U007b> # LEFT CURLY BRACKET / ACCOLADE GAUCHE
+<left-curly-bracket> <U007b> # LEFT CURLY BRACKET / ACCOLADE GAUCHE
+<vertical-line> <U007c> # VERTICAL LINE / LIGNE VERTICALE
+<right-brace> <U007d> # RIGHT CURLY BRACKET / ACCOLADE DROITE
+<right-curly-bracket> <U007d> # RIGHT CURLY BRACKET / ACCOLADE DROITE
+<tilde> <U007e> # TILDE
+#
+# Les caract<e!>res suivants constituent une extension de l'alphabet latin no 1
+# qui couvre <a!> la fois le jeu graphique suppl<e'>mentaire <a!> usage
+# g<e'>n<e'>ral de la norme CAN/CSA Z243.4 et les caract<e!>res du NAPLPS,
+# correspondant <a!> peu de choses pr<e!>s <a!> la norme ISO/C<E'>I 6937.
+#
+# The following characters contitute an extension of Latin Alphabet No. 1
+# which covers both the general purpose graphic supplementary set of the
+# CAN/CSA Z243.4 standard and the characters used in NAPLPS, which themselves
+# correspond roughly to those of ISO/IEC 6937 standard.
+#
+<A-> <U0100>
+<a-> <U0101>
+<A(> <U0102>
+<a(> <U0103>
+<A;> <U0104>
+<a;> <U0105>
+<C'> <U0106>
+<c'> <U0107>
+<C/>> <U0108>
+<c/>> <U0109>
+<C.> <U010A>
+<c.> <U010B>
+<C<> <U010C>
+<c<> <U010D>
+<D<> <U010E>
+<d<> <U010F>
+<D//> <U0110>
+<d//> <U0111>
+<E-> <U0112>
+<e-> <U0113>
+<E.> <U0116>
+<e.> <U0117>
+<E;> <U0118>
+<e;> <U0119>
+<E<> <U011A>
+<e<> <U011B>
+<G/>> <U011C>
+<g/>> <U011D>
+<G(> <U011E>
+<g(> <U011F>
+<G.> <U0120>
+<g.> <U0121>
+<G,> <U0122>
+<g,> <U0123>
+<H/>> <U0124>
+<h/>> <U0125>
+<H//> <U0126>
+<h//> <U0127>
+<I?> <U0128>
+<i?> <U0129>
+<I-> <U012A>
+<i-> <U012B>
+<I;> <U012E>
+<i;> <U012F>
+<I.> <U0130>
+<i.> <U0131>
+<IJ> <U0132>
+<ij> <U0133>
+<J/>> <U0134>
+<j/>> <U0135>
+<K,> <U0136>
+<k,> <U0137>
+<kk> <U0138>
+<L'> <U0139>
+<l'> <U013A>
+<L,> <U013B>
+<l,> <U013C>
+<L<> <U013D>
+<l<> <U013E>
+<L.> <U013F>
+<l.> <U0140>
+<L//> <U0141>
+<l//> <U0142>
+<N'> <U0143>
+<n'> <U0144>
+<N,> <U0145>
+<n,> <U0146>
+<N<> <U0147>
+<n<> <U0148>
+<'n> <U0149>
+<NG> <U014A>
+<ng> <U014B>
+<O-> <U014C>
+<o-> <U014D>
+<O"> <U0150>
+<o"> <U0151>
+<OE> <U0152>
+<oe> <U0153>
+<R'> <U0154>
+<r'> <U0155>
+<R,> <U0156>
+<r,> <U0157>
+<R<> <U0158>
+<r<> <U0159>
+<S'> <U015A>
+<s'> <U015B>
+<S/>> <U015C>
+<s/>> <U015D>
+<S,> <U015E>
+<s,> <U015F>
+<S<> <U0160>
+<s<> <U0161>
+<T,> <U0162>
+<t,> <U0163>
+<T<> <U0164>
+<t<> <U0165>
+<T//> <U0166>
+<t//> <U0167>
+<U?> <U0168>
+<u?> <U0169>
+<U-> <U016A>
+<u-> <U016B>
+<U(> <U016C>
+<u(> <U016D>
+<U0> <U016E>
+<u0> <U016F>
+<U"> <U0170>
+<u"> <U0171>
+<U;> <U0172>
+<u;> <U0173>
+<W/>> <U0174>
+<w/>> <U0175>
+<Y/>> <U0176>
+<y/>> <U0177>
+<Y:> <U0178>
+<Z'> <U0179>
+<z'> <U017A>
+<Z.> <U017B>
+<z.> <U017C>
+<Z<> <U017D>
+<z<> <U017E>
+<"(> <U0306>
+<".> <U0307>
+<"0> <U030A>
+<""> <U030B>
+<"<> <U030C>
+<";> <U0328>
+<"_> <U0332>
+<"//> <U0338>
+<'6> <U2018>
+<'9> <U2019>
+<"6> <U201C>
+<"9> <U201D>
+<0S> <U2070>
+<4S> <U2074>
+<5S> <U2075>
+<6S> <U2076>
+<7S> <U2077>
+<8S> <U2078>
+<9S> <U2079>
+<+S> <U207A>
+<-S> <U207B>
+<(S> <U207D>
+<)S> <U207E>
+<"7> <U20D1>
+<TM> <U2122>
+<Om> <U2126>
+<18> <U215B>
+<38> <U215C>
+<58> <U215D>
+<78> <U215E>
+<<-> <U2190>
+<-!> <U2191>
+<-/>> <U2192>
+<-v> <U2193>
+<FP> <U220E>
+<!=> <U2260>
+<=<> <U2264>
+</>=> <U2265>
+<_-> <U2500>
+<_=> <U2501>
+<_!> <U2502>
+<_V/>> <U250C>
+<_V<w> <U2510>
+<_A/>> <U2514>
+<_A<> <U2518>
+<_!/>> <U251C>
+<_!<> <U2524>
+<_V-> <U252C>
+<_-A> <U2534>
+<_!-> <U253C>
+<_/>//> <U2571>
+<_<\> <U2572>
+<_./>//> <U25E2>
+<_.<\> <U25E3>
+<_d!> <U266A>
+END CHARIDS
diff --git a/localedata/mnemonic.ds b/localedata/repertoiremaps/mnemonic.ds
index abeab6b..fb70c3b 100644
--- a/localedata/mnemonic.ds
+++ b/localedata/repertoiremaps/mnemonic.ds
@@ -1,4 +1,4 @@
-escape_char /
+<escape_char> /
<NUL> <U0000> NULL (NUL)
<SOH> <U0001> START OF HEADING (SOH)
<STX> <U0002> START OF TEXT (STX)
@@ -33,7 +33,7 @@ escape_char /
<DEL> <U007F> DELETE (DEL)
<space> <U0020> SPACE
<exclamation-mark> <U0021> EXCLAMATION MARK
-<quotation-mark> <U0022> QUOTATION MARK
+<quotation-mark> <U003F> QUESTION MARK
<number-sign> <U0023> NUMBER SIGN
<dollar-sign> <U0024> DOLLAR SIGN
<percent-sign> <U0025> PERCENT SIGN
@@ -353,8 +353,8 @@ escape_char /
<o-> <U014d> LATIN SMALL LETTER O WITH MACRON
<O(> <U014e> LATIN CAPITAL LETTER O WITH BREVE
<o(> <U014f> LATIN SMALL LETTER O WITH BREVE
-<O"> <U0150> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-<o"> <U0151> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+<O"> <U0150> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+<o"> <U0151> LATIN SMALL LETTER O WITH DOUBLE ACUTE
<OE> <U0152> LATIN CAPITAL LIGATURE OE
<oe> <U0153> LATIN SMALL LIGATURE OE
<R'> <U0154> LATIN CAPITAL LETTER R WITH ACUTE
@@ -385,8 +385,8 @@ escape_char /
<u(> <U016d> LATIN SMALL LETTER U WITH BREVE
<U0> <U016e> LATIN CAPITAL LETTER U WITH RING ABOVE
<u0> <U016f> LATIN SMALL LETTER U WITH RING ABOVE
-<U"> <U0170> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-<u"> <U0171> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+<U"> <U0170> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+<u"> <U0171> LATIN SMALL LETTER U WITH DOUBLE ACUTE
<U;> <U0172> LATIN CAPITAL LETTER U WITH OGONEK
<u;> <U0173> LATIN SMALL LETTER U WITH OGONEK
<W/>> <U0174> LATIN CAPITAL LETTER W WITH CIRCUMFLEX
@@ -1012,10 +1012,10 @@ escape_char /
<?:> <U1fc1> GREEK DIALYTIKA AND PERISPOMENI
<,!> <U1fcd> GREEK PSILI AND VARIA
<,'> <U1fce> GREEK PSILI AND OXIA
-<?,> <U1fcf> GREEK PSILI AND PERISPOMENI
+<?,> <U1fcf> GREEK PSILI AND PERISPOMENI
<;!> <U1fdd> GREEK DASIA AND VARIA
<;'> <U1fde> GREEK DASIA AND OXIA
-<?;> <U1fdf> GREEK DASIA AND PERISPOMENI
+<?;> <U1fdf> GREEK DASIA AND PERISPOMENI
<!:> <U1fed> GREEK DIALYTIKA AND VARIA
<!*> <U1fef> GREEK VARIA
<;;> <U1ffe> GREEK DASIA
@@ -1060,31 +1060,31 @@ escape_char /
<:X> <U203b> REFERENCE MARK
<!*2> <U203c> DOUBLE EXCLAMATION MARK
<'-> <U203e> OVERLINE
-<0S> <U2070> SUPERSCRIPT ZERO
-<4S> <U2074> SUPERSCRIPT FOUR
-<5S> <U2075> SUPERSCRIPT FIVE
-<6S> <U2076> SUPERSCRIPT SIX
-<7S> <U2077> SUPERSCRIPT SEVEN
-<8S> <U2078> SUPERSCRIPT EIGHT
-<9S> <U2079> SUPERSCRIPT NINE
+<0S> <U2070> SUPERSCRIPT ZERO
+<4S> <U2074> SUPERSCRIPT FOUR
+<5S> <U2075> SUPERSCRIPT FIVE
+<6S> <U2076> SUPERSCRIPT SIX
+<7S> <U2077> SUPERSCRIPT SEVEN
+<8S> <U2078> SUPERSCRIPT EIGHT
+<9S> <U2079> SUPERSCRIPT NINE
<+S> <U207a> SUPERSCRIPT PLUS SIGN
-<-S> <U207b> SUPERSCRIPT MINUS
+<-S> <U207b> SUPERSCRIPT MINUS
<=S> <U207c> SUPERSCRIPT EQUALS SIGN
<(S> <U207d> SUPERSCRIPT LEFT PARENTHESIS
<)S> <U207e> SUPERSCRIPT RIGHT PARENTHESIS
<nS> <U207f> SUPERSCRIPT LATIN SMALL LETTER N
-<0s> <U2080> SUBSCRIPT ZERO
-<1s> <U2081> SUBSCRIPT ONE
-<2s> <U2082> SUBSCRIPT TWO
-<3s> <U2083> SUBSCRIPT THREE
-<4s> <U2084> SUBSCRIPT FOUR
-<5s> <U2085> SUBSCRIPT FIVE
-<6s> <U2086> SUBSCRIPT SIX
-<7s> <U2087> SUBSCRIPT SEVEN
-<8s> <U2088> SUBSCRIPT EIGHT
-<9s> <U2089> SUBSCRIPT NINE
+<0s> <U2080> SUBSCRIPT ZERO
+<1s> <U2081> SUBSCRIPT ONE
+<2s> <U2082> SUBSCRIPT TWO
+<3s> <U2083> SUBSCRIPT THREE
+<4s> <U2084> SUBSCRIPT FOUR
+<5s> <U2085> SUBSCRIPT FIVE
+<6s> <U2086> SUBSCRIPT SIX
+<7s> <U2087> SUBSCRIPT SEVEN
+<8s> <U2088> SUBSCRIPT EIGHT
+<9s> <U2089> SUBSCRIPT NINE
<+s> <U208a> SUBSCRIPT PLUS SIGN
-<-s> <U208b> SUBSCRIPT MINUS
+<-s> <U208b> SUBSCRIPT MINUS
<=s> <U208c> SUBSCRIPT EQUALS SIGN
<(s> <U208d> SUBSCRIPT LEFT PARENTHESIS
<)s> <U208e> SUBSCRIPT RIGHT PARENTHESIS
@@ -1197,7 +1197,7 @@ escape_char /
<.:> <U2234> THEREFORE
<:.> <U2235> BECAUSE
<:R> <U2236> RATIO
-<::> <U2237> PROPORTION
+<::> <U2237> PROPORTION
<?1> <U223c> TILDE OPERATOR
<CG> <U223e> INVERTED LAZY S
<?-> <U2243> ASYMPTOTICALLY EQUAL TO
@@ -1242,7 +1242,7 @@ escape_char /
<1j> <U2446> OCR BRANCH BANK IDENTIFICATION
<2j> <U2447> OCR AMOUNT OF CHECK
<3j> <U2448> OCR DASH
-<4j> <U2449> OCR CUSTOMER ACCOUNT NUMBER
+<4j> <U2449> OCR CUSTOMER ACCOUNT NUMBER
<1-o> <U2460> CIRCLED DIGIT ONE
<2-o> <U2461> CIRCLED DIGIT TWO
<3-o> <U2462> CIRCLED DIGIT THREE
@@ -1426,7 +1426,7 @@ escape_char /
<UdL> <U2529> BOX DRAWINGS DOWN LIGHT AND LEFT UP HEAVY
<uDL> <U252a> BOX DRAWINGS UP LIGHT AND LEFT DOWN HEAVY
<VL> <U252b> BOX DRAWINGS HEAVY VERTICAL AND LEFT
-<dh> <U252c> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+<dh> <U252c> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
<dLr> <U252d> BOX DRAWINGS LEFT HEAVY AND RIGHT DOWN LIGHT
<dlR> <U252e> BOX DRAWINGS RIGHT HEAVY AND LEFT DOWN LIGHT
<dH> <U252f> BOX DRAWINGS DOWN LIGHT AND HORIZONTAL HEAVY
@@ -1486,7 +1486,7 @@ escape_char /
<Tr> <U25b7> WHITE RIGHT-POINTING TRIANGLE
<PR> <U25ba> BLACK RIGHT-POINTING POINTER
<Dt> <U25bc> BLACK DOWN-POINTING TRIANGLE
-<dT> <U25bd> WHITE DOWN-POINTING TRIANGLE
+<dT> <U25bd> WHITE DOWN-POINTING TRIANGLE
<Tl> <U25c1> WHITE LEFT-POINTING TRIANGLE
<PL> <U25c4> BLACK LEFT-POINTING POINTER
<Db> <U25c6> BLACK DIAMOND
@@ -1939,29 +1939,3 @@ escape_char /
<OC> <U009d> OPERATING SYSTEM COMMAND (OSC)
<PM> <U009e> PRIVACY MESSAGE (PM)
<AC> <U009f> APPLICATION PROGRAM COMMAND (APC)
-<"3> <ISO-IR-53_C9> NON-SPACING UMLAUT (not a real character)
-<"1> <ISO-IR-70_C0> NON-SPACING DIAERESIS WITH ACCENT (not a real character)
-<"!> <ISO-IR-103_C1> NON-SPACING GRAVE ACCENT (not a real character)
-<"'> <ISO-IR-103_C2> NON-SPACING ACUTE ACCENT (not a real character)
-<"/>> <ISO-IR-103_C3> NON-SPACING CIRCUMFLEX ACCENT (not a real character)
-<"?> <ISO-IR-103_C4> NON-SPACING TILDE (not a real character)
-<"-> <ISO-IR-103_C5> NON-SPACING MACRON (not a real character)
-<"(> <ISO-IR-103_C6> NON-SPACING BREVE (not a real character)
-<".> <ISO-IR-103_C7> NON-SPACING DOT ABOVE (not a real character)
-<":> <ISO-IR-103_C8> NON-SPACING DIAERESIS (not a real character)
-<"0> <ISO-IR-103_CA> NON-SPACING RING ABOVE (not a real character)
-<",> <ISO-IR-103_CB> NON-SPACING CEDILLA (not a real character)
-<"_> <ISO-IR-103_CC> NON-SPACING LOW LINE (not a real character)
-<""> <ISO-IR-103_CD> NON-SPACING DOUBLE ACCUTE ACCENT (not a real character)
-<";> <ISO-IR-103_CE> NON-SPACING OGONEK (not a real character)
-<"<> <ISO-IR-103_CF> NON-SPACING CARON (not a real character)
-<"=> <ISO-IR-38_D9> NON-SPACING DOUBLE LOW LINE (not a real character)
-<"//> <ISO-IR-128_C9> NON-SPACING LONG SOLIDUS OVERLAY (not a real character)
-<"p> <ISO-IR-55_25> GREEK NON-SPACING PSILI PNEUMATA (not a real character)
-<"d> <ISO-IR-55_26> GREEK NON-SPACING DASIA PNEUMATA (not a real character)
-<"i> <ISO-IR-55_27> GREEK NON-SPACING IOTA BELOW (not a real character)
-<+_> <ISO-IR-87_2138> IDEOGRAPHIC DITTO MARK
-<a+:> <IBM868_90> ARABIC LETTER ALEF FINAL FORM COMPATIBILITY
-<Tel> <ISO-IR-149_2265> TEL COMPATIBILITY SIGN
-<UA> <ISO-IR-8-1_40> Unit space A
-<UB> <ISO-IR-8-1_60> Unit space B
diff --git a/sysdeps/arm/memset.S b/sysdeps/arm/memset.S
index a986d68..567cc39 100644
--- a/sysdeps/arm/memset.S
+++ b/sysdeps/arm/memset.S
@@ -63,6 +63,5 @@ ENTRY(memset)
strb a2, [a4], $1
strb a2, [a4], $1
strb a2, [a4], $1
- strb a2, [a4], $1
RETINSTR(mov,pc,lr)
END(memset)
diff --git a/sysdeps/unix/arm/start.c b/sysdeps/unix/arm/start.c
new file mode 100644
index 0000000..7723847
--- /dev/null
+++ b/sysdeps/unix/arm/start.c
@@ -0,0 +1,85 @@
+/* Special startup code for ARM a.out binaries.
+ Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sysdep.h>
+
+/* The first piece of initialized data. */
+int __data_start = 0;
+#ifdef HAVE_WEAK_SYMBOLS
+weak_alias (__data_start, data_start)
+#endif
+
+#ifndef errno
+volatile int __errno;
+strong_alias (__errno, errno)
+#endif
+
+extern void __libc_init __P ((int argc, char **argv, char **envp));
+extern int main __P ((int argc, char **argv, char **envp));
+
+/* N.B.: It is important that this be the first function.
+ This file is the first thing in the text section. */
+
+/* If this was in C it might create its own stack frame and
+ screw up the arguments. */
+#ifdef NO_UNDERSCORES
+asm (".text; .globl _start; _start: B start1");
+#else
+asm (".text; .globl __start; __start: B _start1");
+
+/* Make an alias called `start' (no leading underscore, so it can't
+ conflict with C symbols) for `_start'. This is the name vendor crt0.o's
+ tend to use, and thus the name most linkers expect. */
+asm (".set start, __start");
+#endif
+
+/* Fool gcc into thinking that more args are passed. This makes it look
+ on the stack (correctly) for the real arguments. It causes somewhat
+ strange register usage in start1(), but we aren't too bothered about
+ that at the moment. */
+#define DUMMIES a1, a2, a3, a4
+
+#ifdef DUMMIES
+#define ARG_DUMMIES DUMMIES,
+#define DECL_DUMMIES int DUMMIES;
+#else
+#define ARG_DUMMIES
+#define DECL_DUMMIES
+#endif
+
+/* ARGSUSED */
+static void
+start1 (ARG_DUMMIES argc, argv, envp)
+ DECL_DUMMIES
+ int argc;
+ char **argv;
+ char **envp;
+{
+ /* Store a pointer to the environment. */
+ __environ = envp;
+
+ /* Do C library initializations. */
+ __libc_init (argc, argv, __environ);
+
+ /* Call the user program. */
+ exit (main (argc, argv, __environ));
+}
diff --git a/sysdeps/unix/sysv/linux/arm/mmap.S b/sysdeps/unix/sysv/linux/arm/mmap.S
new file mode 100644
index 0000000..905303e
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/arm/mmap.S
@@ -0,0 +1,39 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+ .text
+
+ENTRY (__mmap)
+
+ /* Because we can only get five args through the syscall interface, and
+ mmap() takes six, we need to build a parameter block and pass its
+ address instead. The 386 port does a similar trick. */
+
+ mov ip, sp
+ stmdb ip!, {a1-a4}
+ mov a1, ip
+ swi SYS_ify (mmap)
+ cmn r0, $4096
+ bhs PLTJMP(syscall_error);
+ ret
+
+PSEUDO_END (__mmap)
+
+weak_alias (__mmap, mmap)
diff --git a/sysdeps/unix/sysv/linux/arm/sysdep.h b/sysdeps/unix/sysv/linux/arm/sysdep.h
index 8a5111f..14dd17c 100644
--- a/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -53,7 +53,7 @@
ENTRY (name) \
DO_CALL (args, syscall_name); \
cmn r0, $4096; \
- bhs PLTJMP(syscall_error);
+ bhs PLTJMP(C_SYMBOL_NAME(__syscall_error));
#undef PSEUDO_END
#define PSEUDO_END(name) \