diff options
author | Dave Brolley <brolley@cygnus.com> | 1998-07-20 13:36:43 +0000 |
---|---|---|
committer | Dave Brolley <brolley@gcc.gnu.org> | 1998-07-20 09:36:43 -0400 |
commit | 0d3ba7398c61d69c79d74da37406192d96432b25 (patch) | |
tree | 144e96513b88da4a8afe4628c0de23e84d53d36c | |
parent | 56f48ce9765aa2b6d4742a4923fee581a12c1418 (diff) | |
download | gcc-0d3ba7398c61d69c79d74da37406192d96432b25.zip gcc-0d3ba7398c61d69c79d74da37406192d96432b25.tar.gz gcc-0d3ba7398c61d69c79d74da37406192d96432b25.tar.bz2 |
lex.c (mbchar.h): #include it.
1998-07-20 Dave Brolley <brolley@cygnus.com>
* lex.c (mbchar.h): #include it.
(GET_ENVIRONMENT): New macro.
(init_parse): Set character set based on LANG environment variable.
(real_yylex): Handle multibyte characters in character literals.
(real_yylex): Handle multibyte characters in string literals.
From-SVN: r21304
-rw-r--r-- | gcc/cp/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/cp/Makefile.in | 2 | ||||
-rw-r--r-- | gcc/cp/lex.c | 287 |
3 files changed, 197 insertions, 100 deletions
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index bd3bd68..bcfaec9 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,11 @@ +1998-07-20 Dave Brolley <brolley@cygnus.com> + + * lex.c (mbchar.h): #include it. + (GET_ENVIRONMENT): New macro. + (init_parse): Set character set based on LANG environment variable. + (real_yylex): Handle multibyte characters in character literals. + (real_yylex): Handle multibyte characters in string literals. + 1998-07-19 Jason Merrill <jason@yorick.cygnus.com> * lex.c (do_identifier): Look for class value even if we don't diff --git a/gcc/cp/Makefile.in b/gcc/cp/Makefile.in index 2b437eb..0e176ab 100644 --- a/gcc/cp/Makefile.in +++ b/gcc/cp/Makefile.in @@ -244,7 +244,7 @@ spew.o : spew.c $(CONFIG_H) $(CXX_TREE_H) \ lex.o : lex.c $(CONFIG_H) $(CXX_TREE_H) \ $(PARSE_H) input.c $(srcdir)/../flags.h hash.h lex.h \ $(srcdir)/../c-pragma.h $(srcdir)/../system.h $(srcdir)/../toplev.h \ - $(srcdir)/../output.h + $(srcdir)/../output.h $(srcdir)/../mbchar.h decl.o : decl.c $(CONFIG_H) $(CXX_TREE_H) $(srcdir)/../flags.h \ lex.h decl.h $(srcdir)/../stack.h $(srcdir)/../output.h \ $(srcdir)/../except.h $(srcdir)/../system.h $(srcdir)/../toplev.h diff --git a/gcc/cp/lex.c b/gcc/cp/lex.c index 57639ad..5ef7f1d 100644 --- a/gcc/cp/lex.c +++ b/gcc/cp/lex.c @@ -39,15 +39,13 @@ Boston, MA 02111-1307, USA. */ #include "toplev.h" #include "output.h" -/* MULTIBYTE_CHARS support only works for native compilers. - ??? Ideally what we want is to model widechar support after - the current floating point support. */ -#ifdef CROSS_COMPILE -#undef MULTIBYTE_CHARS -#endif - #ifdef MULTIBYTE_CHARS +#include "mbchar.h" #include <locale.h> + +#ifndef GET_ENVIRONMENT +#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME)) +#endif #endif #define obstack_chunk_alloc xmalloc @@ -474,6 +472,12 @@ init_parse (filename) int i; +#ifdef MULTIBYTE_CHARS + /* Change to the native locale for multibyte conversions. */ + setlocale (LC_CTYPE, ""); + GET_ENVIRONMENT (literal_codeset, "LANG"); +#endif + #if USE_CPPLIB yy_cur = "\n"; yy_lim = yy_cur + 1; @@ -3922,30 +3926,27 @@ real_yylex () { register int result = 0; register int num_chars = 0; + int chars_seen = 0; unsigned width = TYPE_PRECISION (char_type_node); int max_chars; - - if (wide_flag) - { - width = WCHAR_TYPE_SIZE; #ifdef MULTIBYTE_CHARS - max_chars = MB_CUR_MAX; -#else - max_chars = 1; + int longest_char = local_mb_cur_max (); + (void) local_mbtowc (NULL_PTR, NULL_PTR, 0); #endif - } - else - max_chars = TYPE_PRECISION (integer_type_node) / width; + + max_chars = TYPE_PRECISION (integer_type_node) / width; + if (wide_flag) + width = WCHAR_TYPE_SIZE; while (1) { tryagain: - c = getch (); if (c == '\'' || c == EOF) break; + ++chars_seen; if (c == '\\') { int ignore = 0; @@ -3954,7 +3955,7 @@ real_yylex () goto tryagain; if (width < HOST_BITS_PER_INT && (unsigned) c >= (1 << width)) - warning ("escape sequence out of range for character"); + pedwarn ("escape sequence out of range for character"); #ifdef MAP_CHARACTER if (ISPRINT (c)) c = MAP_CHARACTER (c); @@ -3963,21 +3964,79 @@ real_yylex () else if (c == '\n') { if (pedantic) - pedwarn ("ANSI C++ forbids newline in character constant"); + pedwarn ("ANSI C forbids newline in character constant"); lineno++; } -#ifdef MAP_CHARACTER else - c = MAP_CHARACTER (c); + { +#ifdef MULTIBYTE_CHARS + wchar_t wc; + int i; + int char_len = -1; + for (i = 1; i <= longest_char; ++i) + { + if (i > maxtoken - 4) + extend_token_buffer (token_buffer); + + token_buffer[i] = c; + char_len = local_mbtowc (& wc, + token_buffer + 1, + i); + if (char_len != -1) + break; + c = getch (); + } + if (char_len > 1) + { + /* mbtowc sometimes needs an extra char before accepting */ + if (char_len < i) + put_back (c); + if (! wide_flag) + { + /* Merge character into result; ignore excess chars. */ + for (i = 1; i <= char_len; ++i) + { + if (i > max_chars) + break; + if (width < HOST_BITS_PER_INT) + result = (result << width) + | (token_buffer[i] + & ((1 << width) - 1)); + else + result = token_buffer[i]; + } + num_chars += char_len; + goto tryagain; + } + c = wc; + } + else + { + if (char_len == -1) + warning ("Ignoring invalid multibyte character"); + if (wide_flag) + c = wc; +#ifdef MAP_CHARACTER + else + c = MAP_CHARACTER (c); #endif + } +#else /* ! MULTIBYTE_CHARS */ +#ifdef MAP_CHARACTER + c = MAP_CHARACTER (c); +#endif +#endif /* ! MULTIBYTE_CHARS */ + } - num_chars++; - if (num_chars > maxtoken - 4) - extend_token_buffer (token_buffer); - - token_buffer[num_chars] = c; + if (wide_flag) + { + if (chars_seen == 1) /* only keep the first one */ + result = c; + goto tryagain; + } /* Merge character into result; ignore excess chars. */ + num_chars++; if (num_chars < max_chars + 1) { if (width < HOST_BITS_PER_INT) @@ -3987,19 +4046,16 @@ real_yylex () } } - token_buffer[num_chars + 1] = '\''; - token_buffer[num_chars + 2] = 0; - if (c != '\'') error ("malformatted character constant"); - else if (num_chars == 0) + else if (chars_seen == 0) error ("empty character constant"); else if (num_chars > max_chars) { num_chars = max_chars; error ("character constant too long"); } - else if (num_chars != 1 && warn_multichar) + else if (chars_seen != 1 && warn_multichar) warning ("multi-character character constant"); /* If char type is signed, sign-extend the constant. */ @@ -4012,37 +4068,21 @@ real_yylex () else if (TREE_UNSIGNED (char_type_node) || ((result >> (num_bits - 1)) & 1) == 0) yylval.ttype - = build_int_2 (result & ((unsigned HOST_WIDE_INT) ~0 + = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0 >> (HOST_BITS_PER_WIDE_INT - num_bits)), 0); else yylval.ttype - = build_int_2 (result | ~((unsigned HOST_WIDE_INT) ~0 + = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0 >> (HOST_BITS_PER_WIDE_INT - num_bits)), -1); - if (num_chars<=1) + if (chars_seen <= 1) TREE_TYPE (yylval.ttype) = char_type_node; else TREE_TYPE (yylval.ttype) = integer_type_node; } else { -#ifdef MULTIBYTE_CHARS - /* Set the initial shift state and convert the next sequence. */ - result = 0; - /* In all locales L'\0' is zero and mbtowc will return zero, - so don't use it. */ - if (num_chars > 1 - || (num_chars == 1 && token_buffer[1] != '\0')) - { - wchar_t wc; - (void) mbtowc (NULL, NULL, 0); - if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars) - result = wc; - else - warning ("Ignoring invalid multibyte character"); - } -#endif yylval.ttype = build_int_2 (result, 0); TREE_TYPE (yylval.ttype) = wchar_type_node; } @@ -4055,6 +4095,12 @@ real_yylex () string_constant: { register char *p; + unsigned width = wide_flag ? WCHAR_TYPE_SIZE + : TYPE_PRECISION (char_type_node); +#ifdef MULTIBYTE_CHARS + int longest_char = local_mb_cur_max (); + (void) local_mbtowc (NULL_PTR, NULL_PTR, 0); +#endif c = getch (); p = token_buffer + 1; @@ -4068,9 +4114,8 @@ real_yylex () c = readescape (&ignore); if (ignore) goto skipnewline; - if (!wide_flag - && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT - && c >= ((unsigned) 1 << TYPE_PRECISION (char_type_node))) + if (width < HOST_BITS_PER_INT + && (unsigned) c >= (1 << width)) warning ("escape sequence out of range for character"); } else if (c == '\n') @@ -4079,10 +4124,74 @@ real_yylex () pedwarn ("ANSI C++ forbids newline in string constant"); lineno++; } + else + { +#ifdef MULTIBYTE_CHARS + wchar_t wc; + int i; + int char_len = -1; + for (i = 0; i < longest_char; ++i) + { + if (p + i == token_buffer + maxtoken) + p = extend_token_buffer (p); + p[i] = c; + + char_len = local_mbtowc (& wc, p, i + 1); + if (char_len != -1) + break; + c = getch (); + } + if (char_len == -1) + warning ("Ignoring invalid multibyte character"); + else + { + /* mbtowc sometimes needs an extra char before accepting */ + if (char_len <= i) + put_back (c); + if (wide_flag) + { + *(wchar_t *)p = wc; + p += sizeof (wc); + } + else + p += (i + 1); + c = getch (); + continue; + } +#endif /* MULTIBYTE_CHARS */ + } - if (p == token_buffer + maxtoken) - p = extend_token_buffer (p); - *p++ = c; + /* Add this single character into the buffer either as a wchar_t + or as a single byte. */ + if (wide_flag) + { + unsigned width = TYPE_PRECISION (char_type_node); + unsigned bytemask = (1 << width) - 1; + int byte; + + if (p + WCHAR_BYTES >= token_buffer + maxtoken) + p = extend_token_buffer (p); + + for (byte = 0; byte < WCHAR_BYTES; ++byte) + { + int value; + if (byte >= sizeof (c)) + value = 0; + else + value = (c >> (byte * width)) & bytemask; + if (BYTES_BIG_ENDIAN) + p[WCHAR_BYTES - byte - 1] = value; + else + p[byte] = value; + } + p += WCHAR_BYTES; + } + else + { + if (p == token_buffer + maxtoken) + p = extend_token_buffer (p); + *p++ = c; + } skipnewline: c = getch (); @@ -4091,56 +4200,36 @@ real_yylex () break; } } - *p = 0; - - /* We have read the entire constant. - Construct a STRING_CST for the result. */ + /* Terminate the string value, either with a single byte zero + or with a wide zero. */ if (wide_flag) { - /* If this is a L"..." wide-string, convert the multibyte string - to a wide character string. */ - char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES); - int len; - -#ifdef MULTIBYTE_CHARS - len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer); - if (len < 0 || len >= (p - token_buffer)) - { - warning ("Ignoring invalid multibyte string"); - len = 0; - } - bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES); -#else - { - char *wp, *cp; - - wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0); - bzero (widep, (p - token_buffer) * WCHAR_BYTES); - for (cp = token_buffer + 1; cp < p; cp++) - *wp = *cp, wp += WCHAR_BYTES; - len = p - token_buffer - 1; - } -#endif - if (processing_template_decl) - push_obstacks (&permanent_obstack, &permanent_obstack); - yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep); - if (processing_template_decl) - pop_obstacks (); - TREE_TYPE (yylval.ttype) = wchar_array_type_node; + if (p + WCHAR_BYTES >= token_buffer + maxtoken) + p = extend_token_buffer (p); + bzero (p, WCHAR_BYTES); + p += WCHAR_BYTES; } else { - if (processing_template_decl) - push_obstacks (&permanent_obstack, &permanent_obstack); - yylval.ttype = build_string (p - token_buffer, token_buffer + 1); - if (processing_template_decl) - pop_obstacks (); - TREE_TYPE (yylval.ttype) = char_array_type_node; + if (p == token_buffer + maxtoken) + p = extend_token_buffer (p); + *p++ = 0; } - *p++ = '"'; - *p = 0; + /* We have read the entire constant. + Construct a STRING_CST for the result. */ + + if (processing_template_decl) + push_obstacks (&permanent_obstack, &permanent_obstack); + yylval.ttype = build_string (p - (token_buffer + 1), token_buffer + 1); + if (processing_template_decl) + pop_obstacks (); + + if (wide_flag) + TREE_TYPE (yylval.ttype) = wchar_array_type_node; + else + TREE_TYPE (yylval.ttype) = char_array_type_node; value = STRING; break; } |