aboutsummaryrefslogtreecommitdiff
path: root/gcc/cexp.y
diff options
context:
space:
mode:
authorDave Brolley <brolley@cygnus.com>1998-07-20 13:35:38 +0000
committerDave Brolley <brolley@gcc.gnu.org>1998-07-20 09:35:38 -0400
commit56f48ce9765aa2b6d4742a4923fee581a12c1418 (patch)
tree671dda9b98d314335b4825d12e41d06427ebd8ce /gcc/cexp.y
parent689fcba8611f93ce241fa090a0423e8d35324027 (diff)
downloadgcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.zip
gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.gz
gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.bz2
configure.in (enable_c_mbchar): New configure option.
Mon Jul 20 16:16:38 1998 Dave Brolley <brolley@cygnus.com> * configure.in (enable_c_mbchar): New configure option. (extra_cpp_objs): Always available now. * cexp.y (mbchar.h): #include it. (yylex): Handle Multibyte characters in character literals. * cccp.c (mbchar.h): #include it. (main): Set character set based on LANG environment variable. (rescan): Handle multibyte characters in comments. (skip_if_group): See above. (validate_else): See above. (skip_to_end_of_comment): See above. (macarg1): See above. (discard_comments): See above. (rescan): Handle multibyte characters in string and character literals. (collect_expansion): See above. (skip_quoted_string): See above. (macroexpand): See above. (macarg1): See above. (discard_comments): See above. (change_newlines): See above. * c-lex.c (mbchar.h): #include it. (GET_ENVIRONMENT): New macro. (init_lex): Set character set based on LANG environment variable. (yylex): Handle multibyte characters in character literals. (yylex): Handle multibyte characters in string literals. * Makefile.in (mbchar.o): New target. (cccp$(exeext)): @extra_cpp_objs@ is always available. (cppmain$(exeext)): @extra_cpp_objs@ is always available. * mbchar.[ch]: New files for multibyte character handling. From-SVN: r21303
Diffstat (limited to 'gcc/cexp.y')
-rw-r--r--gcc/cexp.y113
1 files changed, 72 insertions, 41 deletions
diff --git a/gcc/cexp.y b/gcc/cexp.y
index 6280aed..5d36329 100644
--- a/gcc/cexp.y
+++ b/gcc/cexp.y
@@ -39,12 +39,12 @@ Boston, MA 02111-1307, USA.
#include "system.h"
#include <setjmp.h>
/* #define YYDEBUG 1 */
+#include "gansidecl.h"
#ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
#include <locale.h>
-#endif
-
-#include "gansidecl.h"
+#endif /* MULTIBYTE_CHARS */
typedef unsigned char U_CHAR;
@@ -641,23 +641,18 @@ yylex ()
{
register HOST_WIDE_INT result = 0;
register int num_chars = 0;
+ int chars_seen = 0;
unsigned width = MAX_CHAR_TYPE_SIZE;
int max_chars;
- char *token_buffer;
-
- if (wide_flag)
- {
- width = MAX_WCHAR_TYPE_SIZE;
#ifdef MULTIBYTE_CHARS
- max_chars = MB_CUR_MAX;
-#else
- max_chars = 1;
+ int longest_char = local_mb_cur_max ();
+ char *token_buffer = (char *) alloca (longest_char);
+ (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
#endif
- }
- else
- max_chars = MAX_LONG_TYPE_SIZE / width;
- token_buffer = (char *) alloca (max_chars + 1);
+ max_chars = MAX_LONG_TYPE_SIZE / width;
+ if (wide_flag)
+ width = MAX_WCHAR_TYPE_SIZE;
while (1)
{
@@ -666,44 +661,96 @@ yylex ()
if (c == '\'' || c == EOF)
break;
+ ++chars_seen;
if (c == '\\')
{
c = parse_escape (&lexptr, mask);
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ wchar_t wc;
+ int i;
+ int char_len = -1;
+ for (i = 1; i <= longest_char; ++i)
+ {
+ token_buffer[i - 1] = c;
+ char_len = local_mbtowc (& wc, token_buffer, i);
+ if (char_len != -1)
+ break;
+ c = *lexptr++;
+ }
+ if (char_len > 1)
+ {
+ /* mbtowc sometimes needs an extra char before accepting */
+ if (char_len < i)
+ lexptr--;
+ if (! wide_flag)
+ {
+ /* Merge character into result; ignore excess chars. */
+ for (i = 1; i <= char_len; ++i)
+ {
+ if (i > max_chars)
+ break;
+ if (width < HOST_BITS_PER_INT)
+ result = (result << width)
+ | (token_buffer[i - 1]
+ & ((1 << width) - 1));
+ else
+ result = token_buffer[i - 1];
+ }
+ num_chars += char_len;
+ continue;
+ }
+ }
+ else
+ {
+ if (char_len == -1)
+ warning ("Ignoring invalid multibyte character");
+ }
+ if (wide_flag)
+ c = wc;
+#endif /* ! MULTIBYTE_CHARS */
+ }
- num_chars++;
+ if (wide_flag)
+ {
+ if (chars_seen == 1) /* only keep the first one */
+ result = c;
+ continue;
+ }
/* Merge character into result; ignore excess chars. */
+ num_chars++;
if (num_chars <= max_chars)
{
- if (width < HOST_BITS_PER_WIDE_INT)
- result = (result << width) | c;
+ if (width < HOST_BITS_PER_INT)
+ result = (result << width) | (c & ((1 << width) - 1));
else
result = c;
- token_buffer[num_chars - 1] = c;
}
}
- token_buffer[num_chars] = 0;
-
if (c != '\'')
error ("malformatted character constant");
- else if (num_chars == 0)
+ else if (chars_seen == 0)
error ("empty character constant");
else if (num_chars > max_chars)
{
num_chars = max_chars;
error ("character constant too long");
}
- else if (num_chars != 1 && ! traditional)
+ else if (chars_seen != 1 && ! traditional)
warning ("multi-character character constant");
/* If char type is signed, sign-extend the constant. */
if (! wide_flag)
{
int num_bits = num_chars * width;
-
- if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
+ if (num_bits == 0)
+ /* We already got an error; avoid invalid shift. */
+ yylval.integer.value = 0;
+ else if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
sizeof ("__CHAR_UNSIGNED__") - 1, -1)
|| ((result >> (num_bits - 1)) & 1) == 0)
yylval.integer.value
@@ -716,22 +763,6 @@ yylex ()
}
else
{
-#ifdef MULTIBYTE_CHARS
- /* Set the initial shift state and convert the next sequence. */
- result = 0;
- /* In all locales L'\0' is zero and mbtowc will return zero,
- so don't use it. */
- if (num_chars > 1
- || (num_chars == 1 && token_buffer[0] != '\0'))
- {
- wchar_t wc;
- (void) mbtowc (NULL_PTR, NULL_PTR, 0);
- if (mbtowc (& wc, token_buffer, num_chars) == num_chars)
- result = wc;
- else
- pedwarn ("Ignoring invalid multibyte character");
- }
-#endif
yylval.integer.value = result;
}
}