aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Brolley <brolley@cygnus.com>1998-07-20 13:35:38 +0000
committerDave Brolley <brolley@gcc.gnu.org>1998-07-20 09:35:38 -0400
commit56f48ce9765aa2b6d4742a4923fee581a12c1418 (patch)
tree671dda9b98d314335b4825d12e41d06427ebd8ce
parent689fcba8611f93ce241fa090a0423e8d35324027 (diff)
downloadgcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.zip
gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.gz
gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.bz2
configure.in (enable_c_mbchar): New configure option.
Mon Jul 20 16:16:38 1998 Dave Brolley <brolley@cygnus.com> * configure.in (enable_c_mbchar): New configure option. (extra_cpp_objs): Always available now. * cexp.y (mbchar.h): #include it. (yylex): Handle Multibyte characters in character literals. * cccp.c (mbchar.h): #include it. (main): Set character set based on LANG environment variable. (rescan): Handle multibyte characters in comments. (skip_if_group): See above. (validate_else): See above. (skip_to_end_of_comment): See above. (macarg1): See above. (discard_comments): See above. (rescan): Handle multibyte characters in string and character literals. (collect_expansion): See above. (skip_quoted_string): See above. (macroexpand): See above. (macarg1): See above. (discard_comments): See above. (change_newlines): See above. * c-lex.c (mbchar.h): #include it. (GET_ENVIRONMENT): New macro. (init_lex): Set character set based on LANG environment variable. (yylex): Handle multibyte characters in character literals. (yylex): Handle multibyte characters in string literals. * Makefile.in (mbchar.o): New target. (cccp$(exeext)): @extra_cpp_objs@ is always available. (cppmain$(exeext)): @extra_cpp_objs@ is always available. * mbchar.[ch]: New files for multibyte character handling. From-SVN: r21303
-rw-r--r--gcc/ChangeLog36
-rw-r--r--gcc/Makefile.in18
-rw-r--r--gcc/c-lex.c264
-rw-r--r--gcc/cccp.c359
-rw-r--r--gcc/cexp.y113
-rw-r--r--gcc/configure.in13
-rw-r--r--gcc/invoke.texi27
-rw-r--r--gcc/mbchar.c288
-rw-r--r--gcc/mbchar.h25
9 files changed, 987 insertions, 156 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b2867b1..b5f465a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,39 @@
+Mon Jul 20 16:16:38 1998 Dave Brolley <brolley@cygnus.com>
+
+ * configure.in (enable_c_mbchar): New configure option.
+ (extra_cpp_objs): Always available now.
+
+ * cexp.y (mbchar.h): #include it.
+ (yylex): Handle Multibyte characters in character literals.
+
+ * cccp.c (mbchar.h): #include it.
+ (main): Set character set based on LANG environment variable.
+ (rescan): Handle multibyte characters in comments.
+ (skip_if_group): See above.
+ (validate_else): See above.
+ (skip_to_end_of_comment): See above.
+ (macarg1): See above.
+ (discard_comments): See above.
+ (rescan): Handle multibyte characters in string and character literals.
+ (collect_expansion): See above.
+ (skip_quoted_string): See above.
+ (macroexpand): See above.
+ (macarg1): See above.
+ (discard_comments): See above.
+ (change_newlines): See above.
+
+ * c-lex.c (mbchar.h): #include it.
+ (GET_ENVIRONMENT): New macro.
+ (init_lex): Set character set based on LANG environment variable.
+ (yylex): Handle multibyte characters in character literals.
+ (yylex): Handle multibyte characters in string literals.
+
+ * Makefile.in (mbchar.o): New target.
+ (cccp$(exeext)): @extra_cpp_objs@ is always available.
+ (cppmain$(exeext)): @extra_cpp_objs@ is always available.
+
+ * mbchar.[ch]: New files for multibyte character handling.
+
Mon Jul 20 01:11:11 1998 David S. Miller <davem@pierdol.cobaltmicro.com>
* jump.c (jump_optimize): When simplifying noop moves and
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 39784e1..ea0a0b5 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -641,7 +641,8 @@ OBJS = toplev.o version.o tree.o print-tree.o stor-layout.o fold-const.o \
regclass.o local-alloc.o global.o reload.o reload1.o caller-save.o gcse.o \
insn-peep.o reorg.o $(SCHED_PREFIX)sched.o final.o recog.o reg-stack.o \
insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o \
- profile.o insn-attrtab.o $(out_object_file) getpwd.o $(EXTRA_OBJS) convert.o
+ profile.o insn-attrtab.o $(out_object_file) getpwd.o $(EXTRA_OBJS) convert.o \
+ mbchar.o
# GEN files are listed separately, so they can be built before doing parallel
# makes for cc1 or cc1plus. Otherwise sequent parallel make attempts to load
@@ -1275,13 +1276,14 @@ c-lang.o : c-lang.c $(CONFIG_H) system.h $(TREE_H) c-tree.h c-lex.h toplev.h \
output.h
c-lex.o : c-lex.c $(CONFIG_H) system.h $(TREE_H) $(RTL_H) c-lex.h c-tree.h \
$(srcdir)/c-parse.h input.h flags.h $(srcdir)/c-gperf.h c-pragma.h \
- toplev.h output.h
+ toplev.h output.h mbchar.h
c-aux-info.o : c-aux-info.c $(CONFIG_H) system.h $(TREE_H) c-tree.h flags.h
c-convert.o : c-convert.c $(CONFIG_H) system.h $(TREE_H) flags.h toplev.h
c-pragma.o: c-pragma.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) except.h \
function.h defaults.h c-pragma.h toplev.h
c-iterate.o: c-iterate.c $(CONFIG_H) system.h $(TREE_H) $(RTL_H) c-tree.h \
flags.h toplev.h $(EXPR_H)
+mbchar.o: $(CONFIG_H) system.h gansidecl.h mbchar.h
collect2$(exeext): collect2.o tlink.o hash.o cplus-dem.o underscore.o \
version.o choose-temp.o mkstemp.o $(LIBDEPS)
@@ -1816,15 +1818,16 @@ $(HOST_PREFIX_1):
cpp$(exeext): $(CCCP)$(exeext)
-rm -f cpp$(exeext)
$(LN) $(CCCP)$(exeext) cpp$(exeext)
-cccp$(exeext): cccp.o cexp.o version.o prefix.o $(LIBDEPS)
- $(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cccp.o cexp.o prefix.o \
- version.o $(LIBS)
+cccp$(exeext): cccp.o cexp.o version.o prefix.o mbchar.o @extra_cpp_objs@ $(LIBDEPS)
+ $(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cccp.o cexp.o prefix.o mbchar.o \
+ version.o @extra_cpp_objs@ $(LIBS)
cexp.o: $(srcdir)/cexp.c $(CONFIG_H) system.h gansidecl.h
$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) -c $(srcdir)/cexp.c
$(srcdir)/cexp.c: $(srcdir)/cexp.y
cd $(srcdir); $(BISON) -o cexp.c cexp.y
-cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h
+cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h \
+ mbchar.h
$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
-DGCC_INCLUDE_DIR=\"$(libsubdir)/include\" \
-DGPLUSPLUS_INCLUDE_DIR=\"$(gxx_include_dir)\" \
@@ -1835,8 +1838,9 @@ cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h
-c `echo $(srcdir)/cccp.c | sed 's,^\./,,'`
cppmain$(exeext): cppmain.o cpplib.o cpphash.o cppalloc.o cpperror.o cppexp.o \
- prefix.o version.o $(LIBDEPS)
+ prefix.o version.o mbchar.o @extra_cpp_objs@ $(LIBDEPS)
$(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cppmain.o cpplib.o cpphash.o \
+ mbchar.o @extra_cpp_objs@ \
cppalloc.o cpperror.o cppexp.o prefix.o version.o $(LIBS)
cppmain.o: cppmain.c $(CONFIG_H) cpplib.h system.h gansidecl.h
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index f82ad76..f4f4a12 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -33,16 +33,14 @@ Boston, MA 02111-1307, USA. */
#include "c-pragma.h"
#include "toplev.h"
-/* MULTIBYTE_CHARS support only works for native compilers.
- ??? Ideally what we want is to model widechar support after
- the current floating point support. */
-#ifdef CROSS_COMPILE
-#undef MULTIBYTE_CHARS
-#endif
-
#ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
#include <locale.h>
+
+#ifndef GET_ENVIRONMENT
+#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
#endif
+#endif /* MULTIBYTE_CHARS */
#if USE_CPPLIB
#include "cpplib.h"
@@ -232,6 +230,7 @@ init_lex ()
#ifdef MULTIBYTE_CHARS
/* Change to the native locale for multibyte conversions. */
setlocale (LC_CTYPE, "");
+ GET_ENVIRONMENT (literal_codeset, "LANG");
#endif
maxtoken = 40;
@@ -1795,30 +1794,27 @@ yylex ()
{
register int result = 0;
register int num_chars = 0;
+ int chars_seen = 0;
unsigned width = TYPE_PRECISION (char_type_node);
int max_chars;
-
- if (wide_flag)
- {
- width = WCHAR_TYPE_SIZE;
#ifdef MULTIBYTE_CHARS
- max_chars = MB_CUR_MAX;
-#else
- max_chars = 1;
+ int longest_char = local_mb_cur_max ();
+ (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
#endif
- }
- else
- max_chars = TYPE_PRECISION (integer_type_node) / width;
+
+ max_chars = TYPE_PRECISION (integer_type_node) / width;
+ if (wide_flag)
+ width = WCHAR_TYPE_SIZE;
while (1)
{
tryagain:
-
c = GETC();
if (c == '\'' || c == EOF)
break;
+ ++chars_seen;
if (c == '\\')
{
int ignore = 0;
@@ -1839,18 +1835,76 @@ yylex ()
pedwarn ("ANSI C forbids newline in character constant");
lineno++;
}
-#ifdef MAP_CHARACTER
else
- c = MAP_CHARACTER (c);
+ {
+#ifdef MULTIBYTE_CHARS
+ wchar_t wc;
+ int i;
+ int char_len = -1;
+ for (i = 1; i <= longest_char; ++i)
+ {
+ if (i > maxtoken - 4)
+ extend_token_buffer (token_buffer);
+
+ token_buffer[i] = c;
+ char_len = local_mbtowc (& wc,
+ token_buffer + 1,
+ i);
+ if (char_len != -1)
+ break;
+ c = GETC ();
+ }
+ if (char_len > 1)
+ {
+ /* mbtowc sometimes needs an extra char before accepting */
+ if (char_len < i)
+ UNGETC (c);
+ if (! wide_flag)
+ {
+ /* Merge character into result; ignore excess chars. */
+ for (i = 1; i <= char_len; ++i)
+ {
+ if (i > max_chars)
+ break;
+ if (width < HOST_BITS_PER_INT)
+ result = (result << width)
+ | (token_buffer[i]
+ & ((1 << width) - 1));
+ else
+ result = token_buffer[i];
+ }
+ num_chars += char_len;
+ goto tryagain;
+ }
+ c = wc;
+ }
+ else
+ {
+ if (char_len == -1)
+ warning ("Ignoring invalid multibyte character");
+ if (wide_flag)
+ c = wc;
+#ifdef MAP_CHARACTER
+ else
+ c = MAP_CHARACTER (c);
#endif
+ }
+#else /* ! MULTIBYTE_CHARS */
+#ifdef MAP_CHARACTER
+ c = MAP_CHARACTER (c);
+#endif
+#endif /* ! MULTIBYTE_CHARS */
+ }
- num_chars++;
- if (num_chars > maxtoken - 4)
- extend_token_buffer (token_buffer);
-
- token_buffer[num_chars] = c;
+ if (wide_flag)
+ {
+ if (chars_seen == 1) /* only keep the first one */
+ result = c;
+ goto tryagain;
+ }
/* Merge character into result; ignore excess chars. */
+ num_chars += (width / TYPE_PRECISION (char_type_node));
if (num_chars < max_chars + 1)
{
if (width < HOST_BITS_PER_INT)
@@ -1860,19 +1914,16 @@ yylex ()
}
}
- token_buffer[num_chars + 1] = '\'';
- token_buffer[num_chars + 2] = 0;
-
if (c != '\'')
error ("malformatted character constant");
- else if (num_chars == 0)
+ else if (chars_seen == 0)
error ("empty character constant");
else if (num_chars > max_chars)
{
num_chars = max_chars;
error ("character constant too long");
}
- else if (num_chars != 1 && ! flag_traditional && warn_multichar)
+ else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
warning ("multi-character character constant");
/* If char type is signed, sign-extend the constant. */
@@ -1897,22 +1948,6 @@ yylex ()
}
else
{
-#ifdef MULTIBYTE_CHARS
- /* Set the initial shift state and convert the next sequence. */
- result = 0;
- /* In all locales L'\0' is zero and mbtowc will return zero,
- so don't use it. */
- if (num_chars > 1
- || (num_chars == 1 && token_buffer[1] != '\0'))
- {
- wchar_t wc;
- (void) mbtowc (NULL_PTR, NULL_PTR, 0);
- if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
- result = wc;
- else
- warning ("Ignoring invalid multibyte character");
- }
-#endif
yylval.ttype = build_int_2 (result, 0);
TREE_TYPE (yylval.ttype) = wchar_type_node;
}
@@ -1924,7 +1959,13 @@ yylex ()
case '"':
string_constant:
{
- c = GETC();
+ unsigned width = wide_flag ? WCHAR_TYPE_SIZE
+ : TYPE_PRECISION (char_type_node);
+#ifdef MULTIBYTE_CHARS
+ int longest_char = local_mb_cur_max ();
+ (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
+#endif
+ c = GETC ();
p = token_buffer + 1;
while (c != '"' && c >= 0)
@@ -1935,9 +1976,8 @@ yylex ()
c = readescape (&ignore);
if (ignore)
goto skipnewline;
- if (!wide_flag
- && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
- && c >= (1 << TYPE_PRECISION (char_type_node)))
+ if (width < HOST_BITS_PER_INT
+ && (unsigned) c >= (1 << width))
pedwarn ("escape sequence out of range for character");
}
else if (c == '\n')
@@ -1946,15 +1986,94 @@ yylex ()
pedwarn ("ANSI C forbids newline in string constant");
lineno++;
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ wchar_t wc;
+ int i;
+ int char_len = -1;
+ for (i = 0; i < longest_char; ++i)
+ {
+ if (p + i == token_buffer + maxtoken)
+ p = extend_token_buffer (p);
+ p[i] = c;
- if (p == token_buffer + maxtoken)
- p = extend_token_buffer (p);
- *p++ = c;
+ char_len = local_mbtowc (& wc, p, i + 1);
+ if (char_len != -1)
+ break;
+ c = GETC ();
+ }
+ if (char_len == -1)
+ warning ("Ignoring invalid multibyte character");
+ else
+ {
+ /* mbtowc sometimes needs an extra char before accepting */
+ if (char_len <= i)
+ UNGETC (c);
+ if (wide_flag)
+ {
+ *(wchar_t *)p = wc;
+ p += sizeof (wc);
+ }
+ else
+ p += (i + 1);
+ c = GETC ();
+ continue;
+ }
+#endif /* MULTIBYTE_CHARS */
+ }
+
+ /* Add this single character into the buffer either as a wchar_t
+ or as a single byte. */
+ if (wide_flag)
+ {
+ unsigned width = TYPE_PRECISION (char_type_node);
+ unsigned bytemask = (1 << width) - 1;
+ int byte;
+
+ if (p + WCHAR_BYTES >= token_buffer + maxtoken)
+ p = extend_token_buffer (p);
+
+ for (byte = 0; byte < WCHAR_BYTES; ++byte)
+ {
+ int value;
+ if (byte >= sizeof (c))
+ value = 0;
+ else
+ value = (c >> (byte * width)) & bytemask;
+ if (BYTES_BIG_ENDIAN)
+ p[WCHAR_BYTES - byte - 1] = value;
+ else
+ p[byte] = value;
+ }
+ p += WCHAR_BYTES;
+ }
+ else
+ {
+ if (p == token_buffer + maxtoken)
+ p = extend_token_buffer (p);
+ *p++ = c;
+ }
skipnewline:
- c = GETC();
+ c = GETC ();
+ }
+
+ /* Terminate the string value, either with a single byte zero
+ or with a wide zero. */
+ if (wide_flag)
+ {
+ if (p + WCHAR_BYTES >= token_buffer + maxtoken)
+ p = extend_token_buffer (p);
+ bzero (p, WCHAR_BYTES);
+ p += WCHAR_BYTES;
+ }
+ else
+ {
+ if (p == token_buffer + maxtoken)
+ p = extend_token_buffer (p);
+ *p++ = 0;
}
- *p = 0;
if (c < 0)
error ("Unterminated string constant");
@@ -1964,52 +2083,27 @@ yylex ()
if (wide_flag)
{
- /* If this is a L"..." wide-string, convert the multibyte string
- to a wide character string. */
- char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
- int len;
-
-#ifdef MULTIBYTE_CHARS
- len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
- if (len < 0 || len >= (p - token_buffer))
- {
- warning ("Ignoring invalid multibyte string");
- len = 0;
- }
- bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
-#else
- {
- char *wp, *cp;
-
- wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0);
- bzero (widep, (p - token_buffer) * WCHAR_BYTES);
- for (cp = token_buffer + 1; cp < p; cp++)
- *wp = *cp, wp += WCHAR_BYTES;
- len = p - token_buffer - 1;
- }
-#endif
- yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
+ yylval.ttype = build_string (p - (token_buffer + 1),
+ token_buffer + 1);
TREE_TYPE (yylval.ttype) = wchar_array_type_node;
value = STRING;
}
else if (objc_flag)
{
/* Return an Objective-C @"..." constant string object. */
- yylval.ttype = build_objc_string (p - token_buffer,
+ yylval.ttype = build_objc_string (p - (token_buffer + 1),
token_buffer + 1);
TREE_TYPE (yylval.ttype) = char_array_type_node;
value = OBJC_STRING;
}
else
{
- yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
+ yylval.ttype = build_string (p - (token_buffer + 1),
+ token_buffer + 1);
TREE_TYPE (yylval.ttype) = char_array_type_node;
value = STRING;
}
- *p++ = '"';
- *p = 0;
-
break;
}
diff --git a/gcc/cccp.c b/gcc/cccp.c
index 1bd7649..55b6e68 100644
--- a/gcc/cccp.c
+++ b/gcc/cccp.c
@@ -45,6 +45,11 @@ typedef unsigned char U_CHAR;
#include "gansidecl.h"
#include "pcp.h"
+#ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
+#include <locale.h>
+#endif /* MULTIBYTE_CHARS */
+
#ifndef GET_ENVIRONMENT
#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ENV_VALUE = getenv (ENV_NAME)
#endif
@@ -1308,6 +1313,12 @@ main (argc, argv)
bzero ((char *) pend_assertions, argc * sizeof (char *));
bzero ((char *) pend_includes, argc * sizeof (char *));
+#ifdef MULTIBYTE_CHARS
+ /* Change to the native locale for multibyte conversions. */
+ setlocale (LC_CTYPE, "");
+ GET_ENVIRONMENT (literal_codeset, "LANG");
+#endif
+
/* Process switches and find input file name. */
for (i = 1; i < argc; i++) {
@@ -2774,9 +2785,27 @@ do { ip = &instack[indepth]; \
bp += 2;
else if (*bp == '/' && bp[1] == '*') {
bp += 2;
- while (!(*bp == '*' && bp[1] == '/'))
- bp++;
- bp += 2;
+ while (1)
+ {
+ if (*bp == '*')
+ {
+ if (bp[1] == '/')
+ {
+ bp += 2;
+ break;
+ }
+ }
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, limit - bp);
+ if (length > 1)
+ bp += (length - 1);
+#endif
+ }
+ bp++;
+ }
}
/* There is no point in trying to deal with C++ // comments here,
because if there is one, then this # must be part of the
@@ -2937,6 +2966,24 @@ do { ip = &instack[indepth]; \
if (ibp[-1] == c)
goto while2end;
break;
+#ifdef MULTIBYTE_CHARS
+ default:
+ {
+ int length;
+ --ibp;
+ length = local_mblen (ibp, limit - ibp);
+ if (length > 0)
+ {
+ --obp;
+ bcopy (ibp, obp, length);
+ obp += length;
+ ibp += length;
+ }
+ else
+ ++ibp;
+ }
+ break;
+#endif
}
}
while2end:
@@ -2983,6 +3030,15 @@ do { ip = &instack[indepth]; \
*obp++ = '\n';
++op->lineno;
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ ibp += (length - 1);
+#endif
+ }
}
break;
}
@@ -3071,6 +3127,16 @@ do { ip = &instack[indepth]; \
goto limit_reached;
}
break;
+#ifdef MULTIBYTE_CHARS
+ default:
+ {
+ int length;
+ length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ ibp += (length - 1);
+ }
+ break;
+#endif
}
}
comment_end:
@@ -3433,11 +3499,27 @@ randomchar:
break;
}
}
- if (*ibp == '\n') {
+ else if (*ibp == '\n') {
/* Newline in a file. Count it. */
++ip->lineno;
++op->lineno;
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ {
+ if (put_out_comments)
+ {
+ bcopy (ibp, obp, length - 1);
+ obp += length - 1;
+ }
+ ibp += (length - 1);
+ }
+#endif
+ }
if (put_out_comments)
*obp++ = *ibp;
}
@@ -3448,9 +3530,32 @@ randomchar:
} else if (! traditional) {
*obp++ = ' ';
}
- for (ibp += 2; *ibp != '\n' || ibp[-1] == '\\'; ibp++)
- if (put_out_comments)
- *obp++ = *ibp;
+ for (ibp += 2; ; ibp++)
+ {
+ if (*ibp == '\n')
+ {
+ if (ibp[-1] != '\\')
+ break;
+ }
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ {
+ if (put_out_comments)
+ {
+ bcopy (ibp, obp, length - 1);
+ obp += length - 1;
+ }
+ ibp += (length - 1);
+ }
+#endif
+ }
+ if (put_out_comments)
+ *obp++ = *ibp;
+ }
} else
break;
}
@@ -6186,6 +6291,25 @@ collect_expansion (buf, end, nargs, arglist)
}
}
+#ifdef MULTIBYTE_CHARS
+ /* Handle multibyte characters inside string and character literals. */
+ if (expected_delimiter != '\0')
+ {
+ int length;
+ --p;
+ length = local_mblen (p, limit - p);
+ if (length > 1)
+ {
+ --exp_p;
+ bcopy (p, exp_p, length);
+ p += length;
+ exp_p += length;
+ continue;
+ }
+ ++p;
+ }
+#endif
+
/* Handle the start of a symbol. */
if (is_idchar[c] && nargs > 0) {
U_CHAR *id_beg = p - 1;
@@ -7412,9 +7536,27 @@ skip_if_group (ip, any, op)
bp += 2;
else if (*bp == '/' && bp[1] == '*') {
bp += 2;
- while (!(*bp == '*' && bp[1] == '/'))
- bp++;
- bp += 2;
+ while (1)
+ {
+ if (*bp == '*')
+ {
+ if (bp[1] == '/')
+ {
+ bp += 2;
+ break;
+ }
+ }
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, endb - bp);
+ if (length > 1)
+ bp += (length - 1);
+#endif
+ }
+ bp++;
+ }
}
/* There is no point in trying to deal with C++ // comments here,
because if there is one, then this # must be part of the
@@ -7458,6 +7600,15 @@ skip_if_group (ip, any, op)
if (bp[1] == '/')
break;
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, endb - bp);
+ if (length > 1)
+ bp += (length - 1);
+#endif
+ }
}
bp += 2;
} else if (bp[1] == '/' && cplusplus_comments) {
@@ -7469,6 +7620,15 @@ skip_if_group (ip, any, op)
warning ("multiline `//' comment");
ip->lineno++;
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, endb - bp);
+ if (length > 1)
+ bp += (length - 1);
+#endif
+ }
}
} else
break;
@@ -7764,6 +7924,15 @@ validate_else (p, limit)
break;
}
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (p, limit - p);
+ if (length > 1)
+ p += (length - 1);
+#endif
+ }
}
}
else if (cplusplus_comments && p[1] == '/')
@@ -7817,6 +7986,22 @@ skip_to_end_of_comment (ip, line_counter, nowarn)
if (op)
++op->lineno;
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, limit - bp);
+ if (length > 1)
+ {
+ if (op)
+ {
+ bcopy (bp, op->bufp, length - 1);
+ op->bufp += (length - 1);
+ }
+ bp += (length - 1);
+ }
+#endif
+ }
if (op)
*op->bufp++ = *bp;
}
@@ -7854,6 +8039,23 @@ skip_to_end_of_comment (ip, line_counter, nowarn)
return bp;
}
break;
+#ifdef MULTIBYTE_CHARS
+ default:
+ {
+ int length;
+ bp--;
+ length = local_mblen (bp, limit - bp);
+ if (length <= 0)
+ length = 1;
+ if (op)
+ {
+ op->bufp--;
+ bcopy (bp, op->bufp, length);
+ op->bufp += length;
+ }
+ bp += length;
+ }
+#endif
}
}
@@ -7944,6 +8146,16 @@ skip_quoted_string (bp, limit, start_line, count_newlines, backslash_newlines_p,
}
} else if (c == match)
break;
+#ifdef MULTIBYTE_CHARS
+ {
+ int length;
+ --bp;
+ length = local_mblen (bp, limit - bp);
+ if (length <= 0)
+ length = 1;
+ bp += length;
+ }
+#endif
}
return bp;
}
@@ -8381,9 +8593,23 @@ macroexpand (hp, op)
else {
if (c == '\\')
escaped = 1;
- if (in_string) {
+ else if (in_string) {
if (c == in_string)
in_string = 0;
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (arg->raw + i, arglen - i);
+ if (length > 1)
+ {
+ bcopy (arg->raw + i, xbuf + totlen, length);
+ i += length - 1;
+ totlen += length;
+ continue;
+ }
+#endif
+ }
} else if (c == '\"' || c == '\'')
in_string = c;
}
@@ -8717,6 +8943,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
break;
}
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, limit - bp);
+ if (length > 1)
+ bp += (length - 1);
+#endif
+ }
}
} else if (bp[1] == '/' && cplusplus_comments) {
*comments = 1;
@@ -8728,6 +8963,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
if (warn_comments)
warning ("multiline `//' comment");
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, limit - bp);
+ if (length > 1)
+ bp += (length - 1);
+#endif
+ }
}
}
break;
@@ -8751,6 +8995,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
if (quotec == '\'')
break;
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ length = local_mblen (bp, limit - bp);
+ if (length > 1)
+ bp += (length - 1);
+#endif
+ }
}
}
break;
@@ -8828,8 +9081,23 @@ discard_comments (start, length, newlines)
/* Comments are equivalent to spaces. */
obp[-1] = ' ';
ibp++;
- while (ibp < limit && (*ibp != '\n' || ibp[-1] == '\\'))
- ibp++;
+ while (ibp < limit)
+ {
+ if (*ibp == '\n')
+ {
+ if (ibp[-1] != '\\')
+ break;
+ }
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ ibp += (length - 1);
+#endif
+ }
+ ibp++;
+ }
break;
}
if (ibp[0] != '*' || ibp + 1 >= limit)
@@ -8849,6 +9117,14 @@ discard_comments (start, length, newlines)
break;
}
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ ibp += (length - 1);
+#endif
+ }
}
break;
@@ -8863,9 +9139,12 @@ discard_comments (start, length, newlines)
*obp++ = c = *ibp++;
if (c == quotec)
break;
- if (c == '\n' && quotec == '\'')
- break;
- if (c == '\\') {
+ if (c == '\n')
+ {
+ if (quotec == '\'')
+ break;
+ }
+ else if (c == '\\') {
if (ibp < limit && *ibp == '\n') {
ibp++;
obp--;
@@ -8876,6 +9155,23 @@ discard_comments (start, length, newlines)
*obp++ = *ibp++;
}
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ ibp--;
+ length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ {
+ obp--;
+ bcopy (ibp, obp, length);
+ ibp += length;
+ obp += length;
+ }
+ else
+ ibp++;
+#endif
+ }
}
}
break;
@@ -8925,10 +9221,33 @@ change_newlines (start, length)
int quotec = c;
while (ibp < limit) {
*obp++ = c = *ibp++;
- if (c == quotec && ibp[-2] != '\\')
- break;
- if (c == '\n' && quotec == '\'')
- break;
+ if (c == quotec)
+ {
+ if (ibp[-2] != '\\')
+ break;
+ }
+ else if (c == '\n')
+ {
+ if (quotec == '\'')
+ break;
+ }
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ int length;
+ ibp--;
+ length = local_mblen (ibp, limit - ibp);
+ if (length > 1)
+ {
+ obp--;
+ bcopy (ibp, obp, length);
+ ibp += length;
+ obp += length;
+ }
+ else
+ ibp++;
+#endif
+ }
}
}
break;
diff --git a/gcc/cexp.y b/gcc/cexp.y
index 6280aed..5d36329 100644
--- a/gcc/cexp.y
+++ b/gcc/cexp.y
@@ -39,12 +39,12 @@ Boston, MA 02111-1307, USA.
#include "system.h"
#include <setjmp.h>
/* #define YYDEBUG 1 */
+#include "gansidecl.h"
#ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
#include <locale.h>
-#endif
-
-#include "gansidecl.h"
+#endif /* MULTIBYTE_CHARS */
typedef unsigned char U_CHAR;
@@ -641,23 +641,18 @@ yylex ()
{
register HOST_WIDE_INT result = 0;
register int num_chars = 0;
+ int chars_seen = 0;
unsigned width = MAX_CHAR_TYPE_SIZE;
int max_chars;
- char *token_buffer;
-
- if (wide_flag)
- {
- width = MAX_WCHAR_TYPE_SIZE;
#ifdef MULTIBYTE_CHARS
- max_chars = MB_CUR_MAX;
-#else
- max_chars = 1;
+ int longest_char = local_mb_cur_max ();
+ char *token_buffer = (char *) alloca (longest_char);
+ (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
#endif
- }
- else
- max_chars = MAX_LONG_TYPE_SIZE / width;
- token_buffer = (char *) alloca (max_chars + 1);
+ max_chars = MAX_LONG_TYPE_SIZE / width;
+ if (wide_flag)
+ width = MAX_WCHAR_TYPE_SIZE;
while (1)
{
@@ -666,44 +661,96 @@ yylex ()
if (c == '\'' || c == EOF)
break;
+ ++chars_seen;
if (c == '\\')
{
c = parse_escape (&lexptr, mask);
}
+ else
+ {
+#ifdef MULTIBYTE_CHARS
+ wchar_t wc;
+ int i;
+ int char_len = -1;
+ for (i = 1; i <= longest_char; ++i)
+ {
+ token_buffer[i - 1] = c;
+ char_len = local_mbtowc (& wc, token_buffer, i);
+ if (char_len != -1)
+ break;
+ c = *lexptr++;
+ }
+ if (char_len > 1)
+ {
+ /* mbtowc sometimes needs an extra char before accepting */
+ if (char_len < i)
+ lexptr--;
+ if (! wide_flag)
+ {
+ /* Merge character into result; ignore excess chars. */
+ for (i = 1; i <= char_len; ++i)
+ {
+ if (i > max_chars)
+ break;
+ if (width < HOST_BITS_PER_INT)
+ result = (result << width)
+ | (token_buffer[i - 1]
+ & ((1 << width) - 1));
+ else
+ result = token_buffer[i - 1];
+ }
+ num_chars += char_len;
+ continue;
+ }
+ }
+ else
+ {
+ if (char_len == -1)
+ warning ("Ignoring invalid multibyte character");
+ }
+ if (wide_flag)
+ c = wc;
+#endif /* ! MULTIBYTE_CHARS */
+ }
- num_chars++;
+ if (wide_flag)
+ {
+ if (chars_seen == 1) /* only keep the first one */
+ result = c;
+ continue;
+ }
/* Merge character into result; ignore excess chars. */
+ num_chars++;
if (num_chars <= max_chars)
{
- if (width < HOST_BITS_PER_WIDE_INT)
- result = (result << width) | c;
+ if (width < HOST_BITS_PER_INT)
+ result = (result << width) | (c & ((1 << width) - 1));
else
result = c;
- token_buffer[num_chars - 1] = c;
}
}
- token_buffer[num_chars] = 0;
-
if (c != '\'')
error ("malformatted character constant");
- else if (num_chars == 0)
+ else if (chars_seen == 0)
error ("empty character constant");
else if (num_chars > max_chars)
{
num_chars = max_chars;
error ("character constant too long");
}
- else if (num_chars != 1 && ! traditional)
+ else if (chars_seen != 1 && ! traditional)
warning ("multi-character character constant");
/* If char type is signed, sign-extend the constant. */
if (! wide_flag)
{
int num_bits = num_chars * width;
-
- if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
+ if (num_bits == 0)
+ /* We already got an error; avoid invalid shift. */
+ yylval.integer.value = 0;
+ else if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
sizeof ("__CHAR_UNSIGNED__") - 1, -1)
|| ((result >> (num_bits - 1)) & 1) == 0)
yylval.integer.value
@@ -716,22 +763,6 @@ yylex ()
}
else
{
-#ifdef MULTIBYTE_CHARS
- /* Set the initial shift state and convert the next sequence. */
- result = 0;
- /* In all locales L'\0' is zero and mbtowc will return zero,
- so don't use it. */
- if (num_chars > 1
- || (num_chars == 1 && token_buffer[0] != '\0'))
- {
- wchar_t wc;
- (void) mbtowc (NULL_PTR, NULL_PTR, 0);
- if (mbtowc (& wc, token_buffer, num_chars) == num_chars)
- result = wc;
- else
- pedwarn ("Ignoring invalid multibyte character");
- }
-#endif
yylval.integer.value = result;
}
}
diff --git a/gcc/configure.in b/gcc/configure.in
index 6791547..b4c1aca 100644
--- a/gcc/configure.in
+++ b/gcc/configure.in
@@ -84,7 +84,7 @@ AC_DEFINE(ENABLE_CHECKING)
# Enable use of cpplib for C.
cpp_main=cccp
AC_ARG_ENABLE(c-cpplib,
-[ --enable-c-cpplib Use cpplib for C.],
+[ --enable-c-cpplib Use cpplib for C and C++.],
if [[[ x$enable_c_cpplib != xno ]]]; then
extra_c_objs="${extra_c_objs} cpplib.o cppexp.o cpphash.o cpperror.o"
extra_c_objs="${extra_c_objs} prefix.o"
@@ -93,6 +93,13 @@ if [[[ x$enable_c_cpplib != xno ]]]; then
cpp_main=cppmain
fi)
+# Enable Multibyte Characters for C/C++
+AC_ARG_ENABLE(c-mbchar,
+[ --enable-c-mbchar Enable multibyte characters for C and C++.],
+if [[[ x$enable_c_mbchar != xno ]]]; then
+ extra_c_flags=-DMULTIBYTE_CHARS=1
+fi)
+
# Enable Haifa scheduler.
AC_ARG_ENABLE(haifa,
[ --enable-haifa Use the experimental scheduler.
@@ -193,6 +200,9 @@ AC_CHECK_FUNCS(strtoul bsearch strerror putenv popen bcopy bzero bcmp \
index rindex strchr strrchr kill getrlimit setrlimit atoll atoq \
sysconf isascii gettimeofday)
+# Make sure wchar_t is available
+#AC_CHECK_TYPE(wchar_t, unsigned int)
+
GCC_FUNC_VFPRINTF_DOPRNT
GCC_FUNC_PRINTF_PTR
@@ -3585,6 +3595,7 @@ AC_SUBST(extra_programs)
AC_SUBST(extra_parts)
AC_SUBST(extra_c_objs)
AC_SUBST(extra_cxx_objs)
+AC_SUBST(extra_cpp_objs)
AC_SUBST(extra_c_flags)
AC_SUBST(extra_objs)
AC_SUBST(host_extra_gcc_objs)
diff --git a/gcc/invoke.texi b/gcc/invoke.texi
index 8056b84..3b3ad43 100644
--- a/gcc/invoke.texi
+++ b/gcc/invoke.texi
@@ -5964,8 +5964,9 @@ the language standard. You should not need to use these options yourself.
@cindex environment variables
This section describes several environment variables that affect how GNU
-CC operates. They work by specifying directories or prefixes to use
-when searching for various kinds of files.
+CC operates. Some of them work by specifying directories or prefixes to use
+when searching for various kinds of files. Some are used to specify other
+ascpects of the compilation environment.
@ifclear INTERNALS
Note that you can also specify places to search using options such as
@@ -6065,6 +6066,28 @@ which case the Make rules are written to that file, guessing the target
name from the source file name. Or the value can have the form
@samp{@var{file} @var{target}}, in which case the rules are written to
file @var{file} using @var{target} as the target name.
+
+@item LANG
+@findex LANG
+@cindex locale definition
+This variable is used to pass locale information to the compiler. One way in
+which this information is used is to determine the character set to be used
+when character literals, string literals and comments are parsed in C and C++.
+When the compiler is configured to allow multibyte characters,
+the following values for @code{LANG} are recognized:
+
+@table @code
+@item C-JIS
+Recognize JIS characters.
+@item C-SJIS
+Recognize SJIS characters.
+@item C-EUCJP
+Recognize EUCJP characters.
+@end table
+
+If @code{LANG} is not defined, or if it has some ther value, then the
+compiler will use mblen and mbtowc as defined by the default locale to
+recognize and translate multibyte characters.
@end table
@node Running Protoize
diff --git a/gcc/mbchar.c b/gcc/mbchar.c
new file mode 100644
index 0000000..d54a497
--- /dev/null
+++ b/gcc/mbchar.c
@@ -0,0 +1,288 @@
+/* Multibyte Character Functions.
+ Copyright (C) 1998 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* These functions are used to manipulate multibyte characters. */
+
+/* Note regarding cross compilation:
+
+ In general translation of multibyte characters to wide characters can
+ only work in a native compiler since the translation function (mbtowc)
+ needs to know about both the source and target character encoding. However,
+ this particular implementation for JIS, SJIS and EUCJP source characters
+ will work for any compiler with a newlib target. Other targets may also
+ work provided that their wchar_t implementation is 2 bytes and the encoding
+ leaves the source character values unchanged (except for removing the
+ state shifting markers). */
+
+#ifdef MULTIBYTE_CHARS
+#include "config.h"
+#include "system.h"
+#include "gansidecl.h"
+#include "mbchar.h"
+#include <locale.h>
+
+typedef enum
+{
+ ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
+} JIS_CHAR_TYPE;
+
+typedef enum
+{
+ ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
+ J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
+} JIS_STATE;
+
+typedef enum
+{
+ COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
+} JIS_ACTION;
+
+/*****************************************************************************
+ * state/action tables for processing JIS encoding
+ * Where possible, switches to JIS are grouped with proceding JIS characters
+ * and switches to ASCII are grouped with preceding JIS characters.
+ * Thus, maximum returned length is:
+ * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
+ *****************************************************************************/
+static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
+/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
+/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
+/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
+/*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
+/*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
+/*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
+/*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
+/*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
+/*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
+/*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
+};
+
+static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
+/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
+/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
+/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
+/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
+/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
+/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
+/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
+/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
+};
+
+
+char *literal_codeset = NULL;
+
+int
+local_mbtowc (pwc, s, n)
+ wchar_t *pwc;
+ const char *s;
+ size_t n;
+{
+ static JIS_STATE save_state = ASCII;
+ JIS_STATE curr_state = save_state;
+ unsigned char *t = (unsigned char *)s;
+
+ if (s != NULL && n == 0)
+ return -1;
+
+ if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
+ {
+ /* This must be the "C" locale or unknown locale -- fall thru */
+ }
+ else if (! strcmp (literal_codeset, "C-SJIS"))
+ {
+ int char1;
+ if (s == NULL)
+ return 0; /* not state-dependent */
+ char1 = *t;
+ if (ISSJIS1 (char1))
+ {
+ int char2 = t[1];
+ if (n <= 1)
+ return -1;
+ if (ISSJIS2 (char2))
+ {
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ return 2;
+ }
+ return -1;
+ }
+ if (pwc != NULL)
+ *pwc = (wchar_t)*t;
+ if (*t == '\0')
+ return 0;
+ return 1;
+ }
+ else if (! strcmp (literal_codeset, "C-EUCJP"))
+ {
+ int char1;
+ if (s == NULL)
+ return 0; /* not state-dependent */
+ char1 = *t;
+ if (ISEUCJP (char1))
+ {
+ int char2 = t[1];
+ if (n <= 1)
+ return -1;
+ if (ISEUCJP (char2))
+ {
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ return 2;
+ }
+ return -1;
+ }
+ if (pwc != NULL)
+ *pwc = (wchar_t)*t;
+ if (*t == '\0')
+ return 0;
+ return 1;
+ }
+ else if (! strcmp (literal_codeset, "C-JIS"))
+ {
+ JIS_ACTION action;
+ JIS_CHAR_TYPE ch;
+ unsigned char *ptr;
+ int i, curr_ch;
+
+ if (s == NULL)
+ {
+ save_state = ASCII;
+ return 1; /* state-dependent */
+ }
+
+ ptr = t;
+
+ for (i = 0; i < n; ++i)
+ {
+ curr_ch = t[i];
+ switch (curr_ch)
+ {
+ case JIS_ESC_CHAR:
+ ch = ESCAPE;
+ break;
+ case '$':
+ ch = DOLLAR;
+ break;
+ case '@':
+ ch = AT;
+ break;
+ case '(':
+ ch = BRACKET;
+ break;
+ case 'B':
+ ch = B;
+ break;
+ case 'J':
+ ch = J;
+ break;
+ case '\0':
+ ch = NUL;
+ break;
+ default:
+ if (ISJIS (curr_ch))
+ ch = JIS_CHAR;
+ else
+ ch = OTHER;
+ }
+
+ action = JIS_action_table[curr_state][ch];
+ curr_state = JIS_state_table[curr_state][ch];
+
+ switch (action)
+ {
+ case NOOP:
+ break;
+ case EMPTY:
+ if (pwc != NULL)
+ *pwc = (wchar_t)0;
+ save_state = curr_state;
+ return i;
+ case COPYA:
+ if (pwc != NULL)
+ *pwc = (wchar_t)*ptr;
+ save_state = curr_state;
+ return (i + 1);
+ case COPYJ:
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ save_state = curr_state;
+ return (i + 1);
+ case COPYJ2:
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ save_state = curr_state;
+ return (ptr - t) + 2;
+ case MAKE_A:
+ case MAKE_J:
+ ptr = (char *)(t + i + 1);
+ break;
+ case ERROR:
+ default:
+ return -1;
+ }
+ }
+
+ return -1; /* n < bytes needed */
+ }
+
+#ifdef CROSS_COMPILE
+ if (s == NULL)
+ return 0; /* not state-dependent */
+ if (pwc != NULL)
+ *pwc = *s;
+ return 1;
+#else
+ /* This must be the "C" locale or unknown locale. */
+ return mbtowc (pwc, s, n);
+#endif
+}
+
+int
+local_mblen (s, n)
+ const char *s;
+ size_t n;
+{
+ return local_mbtowc (NULL, s, n);
+}
+
+int
+local_mb_cur_max ()
+{
+ if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
+ ;
+ else if (! strcmp (literal_codeset, "C-SJIS"))
+ return 2;
+ else if (! strcmp (literal_codeset, "C-EUCJP"))
+ return 2;
+ else if (! strcmp (literal_codeset, "C-JIS"))
+ return 8; /* 3 + 2 + 3 */
+
+#ifdef CROSS_COMPILE
+ return 1;
+#else
+ return MB_CUR_MAX;
+#endif
+}
+#endif /* MULTIBYTE_CHARS */
diff --git a/gcc/mbchar.h b/gcc/mbchar.h
new file mode 100644
index 0000000..a4b82c0
--- /dev/null
+++ b/gcc/mbchar.h
@@ -0,0 +1,25 @@
+/* mbchar.h - Various declarations for functions found in mbchar.c
+ Copyright (C) 1998 Free Software Foundation, Inc.
+ */
+
+#ifndef __GCC_MBCHAR_H__
+#define __GCC_MBCHAR_H__
+
+#ifdef MULTIBYTE_CHARS
+/* escape character used for JIS encoding */
+#define JIS_ESC_CHAR 0x1b
+
+#define ISSJIS1(c) ((c) >= 0x81 && (c) <= 0x9f || (c) >= 0xe0 && (c) <= 0xef)
+#define ISSJIS2(c) ((c) >= 0x40 && (c) <= 0x7e || (c) >= 0x80 && (c) <= 0xfc)
+#define ISEUCJP(c) ((c) >= 0xa1 && (c) <= 0xfe)
+#define ISJIS(c) ((c) >= 0x21 && (c) <= 0x7e)
+
+int local_mbtowc PROTO ((wchar_t *, const char *, size_t));
+int local_mblen PROTO ((const char *, size_t));
+int local_mb_cur_max PROTO ((void));
+
+/* The locale being used for multibyte characters in string/char literals. */
+extern char *literal_codeset;
+#endif /* MULTIBYTE_CHARS */
+
+#endif /* __GCC_MBCHAR_H__ */