aboutsummaryrefslogtreecommitdiff
path: root/locale
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-04-07 02:38:44 +0000
committerUlrich Drepper <drepper@redhat.com>2000-04-07 02:38:44 +0000
commita0dc52061fd8d47c37bbd363533ae0a4307e507c (patch)
tree7b44e3c280a8c39bc7f4c13b426eee8a40dda66d /locale
parent372f94ba46f2b64aa2d99085112816789db3fe41 (diff)
downloadglibc-a0dc52061fd8d47c37bbd363533ae0a4307e507c.zip
glibc-a0dc52061fd8d47c37bbd363533ae0a4307e507c.tar.gz
glibc-a0dc52061fd8d47c37bbd363533ae0a4307e507c.tar.bz2
Update.
2000-04-06 Ulrich Drepper <drepper@redhat.com> * locale/programs/charmap.c (charmap_new_char): Add parameter step. Support ..(2).. ellipsis. (parse_charmap): Recognize ..(2).. etc and pass step down. Correctly generate names for UCS4 characters. * locale/programs/ld-ctype.c (struct translit_ignore_t): Add step. (ctype_finish): We know the wide character value for <SP>, don't search. (charclass_symbolic_ellipsis): Handle ..(2).. ellipsis. (charclass_ucs4_ellipsis): Likewise. (read_translit_ignore_entry): Store ellipsis step. (ctype_read): Recognize ..(2).. etc and pass step down. * locale/programs/linereader.c (lr_token): When seeing comment character ignore only rest of line in sources but stop at escaped newline. Recognize ..(2).. and ....(2)..... * locale/programs/locfile-token.h (enum token_t): Add tok_ellipsis2_2 and tok_ellipsis4_2.
Diffstat (limited to 'locale')
-rw-r--r--locale/programs/charmap.c32
-rw-r--r--locale/programs/ld-ctype.c69
-rw-r--r--locale/programs/linereader.c26
-rw-r--r--locale/programs/locfile-token.h4
4 files changed, 98 insertions, 33 deletions
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c
index 37047a0..3ccebca 100644
--- a/locale/programs/charmap.c
+++ b/locale/programs/charmap.c
@@ -53,7 +53,7 @@ static void new_width (struct linereader *cmfile, struct charmap_t *result,
unsigned long int width);
static void charmap_new_char (struct linereader *lr, struct charmap_t *cm,
int nbytes, char *bytes, const char *from,
- const char *to, int decimal_ellipsis);
+ const char *to, int decimal_ellipsis, int step);
struct charmap_t *
@@ -225,6 +225,7 @@ parse_charmap (struct linereader *cmfile)
char *from_name = NULL;
char *to_name = NULL;
enum token_t ellipsis = 0;
+ int step = 1;
/* We don't want symbolic names in string to be translated. */
cmfile->translate_strings = 0;
@@ -461,7 +462,7 @@ character sets with locking states are not supported"));
now->val.str.lenmb);
else
{
- obstack_printf (&result->mem_pool, "<%08X>",
+ obstack_printf (&result->mem_pool, "U%08X",
cmfile->token.val.ucs4);
obstack_1grow (&result->mem_pool, '\0');
from_name = (char *) obstack_finish (&result->mem_pool);
@@ -475,9 +476,20 @@ character sets with locking states are not supported"));
/* We have two possibilities: We can see an ellipsis or an
encoding value. */
if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
- || nowtok == tok_ellipsis2)
+ || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2
+ || nowtok == tok_ellipsis2_2)
{
ellipsis = nowtok;
+ if (nowtok == tok_ellipsis4_2)
+ {
+ step = 2;
+ nowtok = tok_ellipsis4;
+ }
+ else if (nowtok == tok_ellipsis2_2)
+ {
+ step = 2;
+ nowtok = tok_ellipsis2;
+ }
state = 4;
continue;
}
@@ -502,13 +514,15 @@ character sets with locking states are not supported"));
else
charmap_new_char (cmfile, result, now->val.charcode.nbytes,
now->val.charcode.bytes, from_name, to_name,
- ellipsis != tok_ellipsis2);
+ ellipsis != tok_ellipsis2, step);
/* Ignore trailing comment silently. */
lr_ignore_rest (cmfile, 0);
from_name = NULL;
to_name = NULL;
+ ellipsis = tok_none;
+ step = 1;
state = 2;
continue;
@@ -531,7 +545,7 @@ character sets with locking states are not supported"));
cmfile->token.val.str.lenmb);
else
{
- obstack_printf (&result->mem_pool, "<%08X>",
+ obstack_printf (&result->mem_pool, "U%08X",
cmfile->token.val.ucs4);
obstack_1grow (&result->mem_pool, '\0');
to_name = (char *) obstack_finish (&result->mem_pool);
@@ -814,7 +828,7 @@ charmap_find_value (const struct charmap_t *cm, const char *name, size_t len)
static void
charmap_new_char (struct linereader *lr, struct charmap_t *cm,
int nbytes, char *bytes, const char *from, const char *to,
- int decimal_ellipsis)
+ int decimal_ellipsis, int step)
{
hash_table *ht = &cm->char_table;
hash_table *bt = &cm->byte_table;
@@ -833,7 +847,7 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm,
newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
newp->nbytes = nbytes;
memcpy (newp->bytes, bytes, nbytes);
- newp->name = obstack_copy (ob, from, len1 + 1);
+ newp->name = from;
newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
@@ -852,7 +866,7 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm,
char *endp;
errno = 0;
- newp->ucs4 = strtoul (from, &endp, 16);
+ newp->ucs4 = strtoul (from + 1, &endp, 16);
if (endp - from != len1
|| (newp->ucs4 == ULONG_MAX && errno == ERANGE)
|| newp->ucs4 >= 0x80000000)
@@ -916,7 +930,7 @@ hexadecimal range format should use only capital characters"));
return;
}
- for (cnt = from_nr; cnt <= to_nr; ++cnt)
+ for (cnt = from_nr; cnt <= to_nr; cnt += step)
{
char *name_end;
obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index 23ca236..9cf4d2c 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -97,6 +97,7 @@ struct translit_ignore_t
{
uint32_t from;
uint32_t to;
+ uint32_t step;
const char *fname;
size_t lineno;
@@ -504,18 +505,13 @@ character '%s' in class `%s' must not be in class `%s'"),
}
/* ... and now test <SP> as a special case. */
- space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
- if (space_value == ILLEGAL_CHAR_VALUE)
- {
- if (!be_quiet)
- error (0, 0, _("character <SP> not defined in character map"));
- }
- else if (((cnt = BITPOS (tok_space),
- (ELEM (ctype, class_collection, , space_value)
- & BITw (tok_space)) == 0)
- || (cnt = BITPOS (tok_blank),
- (ELEM (ctype, class_collection, , space_value)
- & BITw (tok_blank)) == 0)))
+ space_value = 32;
+ if (((cnt = BITPOS (tok_space),
+ (ELEM (ctype, class_collection, , space_value)
+ & BITw (tok_space)) == 0)
+ || (cnt = BITPOS (tok_blank),
+ (ELEM (ctype, class_collection, , space_value)
+ & BITw (tok_blank)) == 0)))
{
if (!be_quiet)
error (0, 0, _("<SP> character not in class `%s'"),
@@ -1236,7 +1232,8 @@ get_character (struct token *now, struct charmap_t *charmap,
}
-/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'. */
+/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
+ the .(2). counterparts. */
static void
charclass_symbolic_ellipsis (struct linereader *ldfile,
struct locale_ctype_t *ctype,
@@ -1246,7 +1243,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile,
const char *last_str,
unsigned long int class256_bit,
unsigned long int class_bit, int base,
- int ignore_content, int handle_digits)
+ int ignore_content, int handle_digits, int step)
{
const char *nowstr = now->val.str.startmb;
char tmp[now->val.str.lenmb + 1];
@@ -1288,7 +1285,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile,
if (!ignore_content)
{
now->val.str.startmb = tmp;
- while (++from <= to)
+ while ((from += step) <= to)
{
struct charseq *seq;
uint32_t wch;
@@ -1346,7 +1343,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile,
}
-/* Ellipsis like in `<U1234>..<U2345>'. */
+/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
static void
charclass_ucs4_ellipsis (struct linereader *ldfile,
struct locale_ctype_t *ctype,
@@ -1355,7 +1352,7 @@ charclass_ucs4_ellipsis (struct linereader *ldfile,
struct token *now, uint32_t last_wch,
unsigned long int class256_bit,
unsigned long int class_bit, int ignore_content,
- int handle_digits)
+ int handle_digits, int step)
{
if (last_wch > now->val.ucs4)
{
@@ -1367,7 +1364,7 @@ to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
}
if (!ignore_content)
- while (++last_wch <= now->val.ucs4)
+ while ((last_wch += step) <= now->val.ucs4)
{
/* We have to find out whether there is a byte sequence corresponding
to this UCS4 value. */
@@ -1376,6 +1373,11 @@ to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
seq = charmap_find_value (charmap, utmp, 9);
+ if (seq == NULL)
+ {
+ snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
+ seq = charmap_find_value (charmap, utmp, 5);
+ }
if (seq == NULL)
/* Try looking in the repertoire map. */
@@ -1779,6 +1781,7 @@ read_translit_ignore_entry (struct linereader *ldfile,
obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
newp->from = from;
newp->to = from;
+ newp->step = 1;
newp->next = ctype->translit_ignore;
ctype->translit_ignore = newp;
@@ -1788,11 +1791,12 @@ read_translit_ignore_entry (struct linereader *ldfile,
line. */
now = lr_token (ldfile, charmap, repertoire);
- if (now->tok == tok_ellipsis2)
+ if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
{
/* XXX Should we bother implementing `....'? `...' certainly
will not be implemented. */
uint32_t to;
+ int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
now = lr_token (ldfile, charmap, repertoire);
@@ -1823,7 +1827,10 @@ read_translit_ignore_entry (struct linereader *ldfile,
{
/* Make sure the `to'-value is larger. */
if (to >= from)
- newp->to = to;
+ {
+ newp->to = to;
+ newp->step = step;
+ }
else
lr_error (ldfile, _("\
to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
@@ -1866,6 +1873,7 @@ ctype_read (struct linereader *ldfile, struct localedef_t *result,
uint32_t last_wch = 0;
enum token_t last_token;
enum token_t ellipsis_token;
+ int step;
char last_charcode[16];
size_t last_charcode_len = 0;
const char *last_str = NULL;
@@ -2040,6 +2048,7 @@ ctype_read (struct linereader *ldfile, struct localedef_t *result,
ctype->class_done |= class_bit;
last_token = tok_none;
ellipsis_token = tok_none;
+ step = 1;
now = lr_token (ldfile, charmap, NULL);
while (now->tok != tok_eol && now->tok != tok_eof)
{
@@ -2140,7 +2149,7 @@ the absolute ellipsis `...' must not be used"));
== tok_ellipsis4
? 10 : 16),
ignore_content,
- handle_digits);
+ handle_digits, step);
}
else if (last_token == tok_ucs4)
{
@@ -2151,7 +2160,8 @@ with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
charclass_ucs4_ellipsis (ldfile, ctype, charmap,
repertoire, now, last_wch,
class256_bit, class_bit,
- ignore_content, handle_digits);
+ ignore_content, handle_digits,
+ step);
}
else
{
@@ -2180,9 +2190,21 @@ with character code range values one must use the absolute ellipsis `...'"));
break;
if (last_token != tok_none
- && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
+ && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
{
+ if (now->tok == tok_ellipsis2_2)
+ {
+ now->tok = tok_ellipsis2;
+ step = 2;
+ }
+ else if (now->tok == tok_ellipsis4_2)
+ {
+ now->tok = tok_ellipsis4;
+ step = 2;
+ }
+
ellipsis_token = now->tok;
+
now = lr_token (ldfile, charmap, NULL);
continue;
}
@@ -2194,6 +2216,7 @@ with character code range values one must use the absolute ellipsis `...'"));
now = lr_token (ldfile, charmap, NULL);
ellipsis_token = tok_none;
+ step = 1;
}
break;
diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
index f6532a4..36dd0cd 100644
--- a/locale/programs/linereader.c
+++ b/locale/programs/linereader.c
@@ -185,6 +185,16 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap,
if (ch != lr->comment_char)
break;
+ /* Is there an newline at the end of the buffer? */
+ if (lr->buf[lr->bufact - 1] != '\n')
+ {
+ /* No. Some people want this to mean that only the line in
+ the file not the logical, concatenated line is ignored.
+ Let's try this. */
+ lr->idx = lr->bufact;
+ continue;
+ }
+
/* Ignore rest of line. */
lr_ignore_rest (lr, 0);
lr->token.tok = tok_eol;
@@ -198,6 +208,14 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap,
/* Match ellipsis. */
if (ch == '.')
{
+ if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
+ {
+ int cnt;
+ for (cnt = 0; cnt < 10; ++cnt)
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis4_2;
+ return &lr->token;
+ }
if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
{
lr_getc (lr);
@@ -213,6 +231,14 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap,
lr->token.tok = tok_ellipsis3;
return &lr->token;
}
+ if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
+ {
+ int cnt;
+ for (cnt = 0; cnt < 6; ++cnt)
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis2_2;
+ return &lr->token;
+ }
if (lr->buf[lr->idx] == '.')
{
lr_getc (lr);
diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
index e1cd5f7..6eecc56 100644
--- a/locale/programs/locfile-token.h
+++ b/locale/programs/locfile-token.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
@@ -31,6 +31,8 @@ enum token_t
tok_ellipsis2,
tok_ellipsis3,
tok_ellipsis4,
+ tok_ellipsis2_2,
+ tok_ellipsis4_2,
tok_semicolon,
tok_comma,
tok_open_brace,