configure.in (enable_c_mbchar): New configure option.

Mon Jul 20 16:16:38 1998 Dave Brolley <brolley@cygnus.com> * configure.in (enable_c_mbchar): New configure option. (extra_cpp_objs): Always available now. * cexp.y (mbchar.h): #include it. (yylex): Handle Multibyte characters in character literals. * cccp.c (mbchar.h): #include it. (main): Set character set based on LANG environment variable. (rescan): Handle multibyte characters in comments. (skip_if_group): See above. (validate_else): See above. (skip_to_end_of_comment): See above. (macarg1): See above. (discard_comments): See above. (rescan): Handle multibyte characters in string and character literals. (collect_expansion): See above. (skip_quoted_string): See above. (macroexpand): See above. (macarg1): See above. (discard_comments): See above. (change_newlines): See above. * c-lex.c (mbchar.h): #include it. (GET_ENVIRONMENT): New macro. (init_lex): Set character set based on LANG environment variable. (yylex): Handle multibyte characters in character literals. (yylex): Handle multibyte characters in string literals. * Makefile.in (mbchar.o): New target. (cccp$(exeext)): @extra_cpp_objs@ is always available. (cppmain$(exeext)): @extra_cpp_objs@ is always available. * mbchar.[ch]: New files for multibyte character handling. From-SVN: r21303
author: Dave Brolley <brolley@cygnus.com> 1998-07-20 13:35:38 +0000
committer: Dave Brolley <brolley@gcc.gnu.org> 1998-07-20 09:35:38 -0400
commit: 56f48ce9765aa2b6d4742a4923fee581a12c1418 (patch)
tree: 671dda9b98d314335b4825d12e41d06427ebd8ce
parent: 689fcba8611f93ce241fa090a0423e8d35324027 (diff)
download: gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.zip
gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.gz
gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.bz2
9 files changed, 987 insertions, 156 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b2867b1..b5f465a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,39 @@
+Mon Jul 20 16:16:38 1998  Dave Brolley  <brolley@cygnus.com>
+
+	* configure.in (enable_c_mbchar): New configure option.
+	(extra_cpp_objs): Always available now.
+
+	* cexp.y (mbchar.h): #include it.
+	(yylex): Handle Multibyte characters in character literals.
+
+	* cccp.c (mbchar.h): #include it.
+	(main): Set character set based on LANG environment variable.
+	(rescan): Handle multibyte characters in comments.
+	(skip_if_group): See above.
+	(validate_else): See above.
+	(skip_to_end_of_comment): See above.
+	(macarg1): See above.
+	(discard_comments): See above.
+	(rescan): Handle multibyte characters in string and character literals.
+	(collect_expansion): See above.
+	(skip_quoted_string): See above.
+	(macroexpand): See above.
+	(macarg1): See above.
+	(discard_comments): See above.
+	(change_newlines): See above.
+
+	* c-lex.c (mbchar.h): #include it.
+	(GET_ENVIRONMENT): New macro.
+	(init_lex): Set character set based on LANG environment variable.
+	(yylex): Handle multibyte characters in character literals.
+	(yylex): Handle multibyte characters in string literals.
+
+	* Makefile.in (mbchar.o): New target.
+	(cccp$(exeext)): @extra_cpp_objs@ is always available.
+	(cppmain$(exeext)): @extra_cpp_objs@ is always available.
+
+	* mbchar.[ch]: New files for multibyte character handling.
+
 Mon Jul 20 01:11:11 1998  David S. Miller  <davem@pierdol.cobaltmicro.com>
 
 	* jump.c (jump_optimize): When simplifying noop moves and
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 39784e1..ea0a0b5 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -641,7 +641,8 @@ OBJS = toplev.o version.o tree.o print-tree.o stor-layout.o fold-const.o \
  regclass.o local-alloc.o global.o reload.o reload1.o caller-save.o gcse.o \
  insn-peep.o reorg.o $(SCHED_PREFIX)sched.o final.o recog.o reg-stack.o \
  insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o \
- profile.o insn-attrtab.o $(out_object_file) getpwd.o $(EXTRA_OBJS) convert.o
+ profile.o insn-attrtab.o $(out_object_file) getpwd.o $(EXTRA_OBJS) convert.o \
+ mbchar.o
 
 # GEN files are listed separately, so they can be built before doing parallel
 #  makes for cc1 or cc1plus.  Otherwise sequent parallel make attempts to load
@@ -1275,13 +1276,14 @@ c-lang.o : c-lang.c $(CONFIG_H) system.h $(TREE_H) c-tree.h c-lex.h toplev.h \
     output.h
 c-lex.o : c-lex.c $(CONFIG_H) system.h $(TREE_H) $(RTL_H) c-lex.h c-tree.h \
     $(srcdir)/c-parse.h input.h flags.h $(srcdir)/c-gperf.h c-pragma.h \
-    toplev.h output.h
+    toplev.h output.h mbchar.h
 c-aux-info.o : c-aux-info.c  $(CONFIG_H) system.h $(TREE_H) c-tree.h flags.h
 c-convert.o : c-convert.c $(CONFIG_H) system.h $(TREE_H) flags.h toplev.h
 c-pragma.o: c-pragma.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) except.h \
     function.h defaults.h c-pragma.h toplev.h
 c-iterate.o: c-iterate.c $(CONFIG_H) system.h $(TREE_H) $(RTL_H) c-tree.h \
     flags.h toplev.h $(EXPR_H)
+mbchar.o: $(CONFIG_H) system.h gansidecl.h mbchar.h
 
 collect2$(exeext): collect2.o tlink.o hash.o cplus-dem.o underscore.o \
 	version.o choose-temp.o mkstemp.o $(LIBDEPS)
@@ -1816,15 +1818,16 @@ $(HOST_PREFIX_1):
 cpp$(exeext): $(CCCP)$(exeext)
 	-rm -f cpp$(exeext)
 	$(LN) $(CCCP)$(exeext) cpp$(exeext)
-cccp$(exeext): cccp.o cexp.o version.o prefix.o $(LIBDEPS)
-	$(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cccp.o cexp.o prefix.o \
-	  version.o $(LIBS)
+cccp$(exeext): cccp.o cexp.o version.o prefix.o mbchar.o @extra_cpp_objs@ $(LIBDEPS)
+	$(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cccp.o cexp.o prefix.o mbchar.o \
+	  version.o @extra_cpp_objs@ $(LIBS)
 cexp.o: $(srcdir)/cexp.c $(CONFIG_H) system.h gansidecl.h
 	$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) -c $(srcdir)/cexp.c
 $(srcdir)/cexp.c: $(srcdir)/cexp.y
 	cd $(srcdir); $(BISON) -o cexp.c cexp.y
 
-cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h
+cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h \
+           mbchar.h
 	$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 	  -DGCC_INCLUDE_DIR=\"$(libsubdir)/include\" \
 	  -DGPLUSPLUS_INCLUDE_DIR=\"$(gxx_include_dir)\" \
@@ -1835,8 +1838,9 @@ cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h
 	  -c `echo $(srcdir)/cccp.c | sed 's,^\./,,'`
 
 cppmain$(exeext): cppmain.o cpplib.o cpphash.o cppalloc.o cpperror.o cppexp.o \
-  prefix.o version.o $(LIBDEPS)
+  prefix.o version.o mbchar.o @extra_cpp_objs@ $(LIBDEPS)
 	$(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cppmain.o cpplib.o cpphash.o \
+	  mbchar.o @extra_cpp_objs@ \
 	  cppalloc.o cpperror.o cppexp.o prefix.o version.o $(LIBS)
 
 cppmain.o: cppmain.c $(CONFIG_H) cpplib.h system.h gansidecl.h
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index f82ad76..f4f4a12 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -33,16 +33,14 @@ Boston, MA 02111-1307, USA.  */
 #include "c-pragma.h"
 #include "toplev.h"
 
-/* MULTIBYTE_CHARS support only works for native compilers.
-   ??? Ideally what we want is to model widechar support after
-   the current floating point support.  */
-#ifdef CROSS_COMPILE
-#undef MULTIBYTE_CHARS
-#endif
-
 #ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
 #include <locale.h>
+
+#ifndef GET_ENVIRONMENT
+#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
 #endif
+#endif /* MULTIBYTE_CHARS */
 
 #if USE_CPPLIB
 #include "cpplib.h"
@@ -232,6 +230,7 @@ init_lex ()
 #ifdef MULTIBYTE_CHARS
   /* Change to the native locale for multibyte conversions.  */
   setlocale (LC_CTYPE, "");
+  GET_ENVIRONMENT (literal_codeset, "LANG");
 #endif
 
   maxtoken = 40;
@@ -1795,30 +1794,27 @@ yylex ()
       {
 	register int result = 0;
 	register int num_chars = 0;
+	int chars_seen = 0;
 	unsigned width = TYPE_PRECISION (char_type_node);
 	int max_chars;
-
-	if (wide_flag)
-	  {
-	    width = WCHAR_TYPE_SIZE;
 #ifdef MULTIBYTE_CHARS
-	    max_chars = MB_CUR_MAX;
-#else
-	    max_chars = 1;
+	int longest_char = local_mb_cur_max ();
+	(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
 #endif
-	  }
-	else
-	  max_chars = TYPE_PRECISION (integer_type_node) / width;
+
+	max_chars = TYPE_PRECISION (integer_type_node) / width;
+	if (wide_flag)
+	  width = WCHAR_TYPE_SIZE;
 
 	while (1)
 	  {
 	  tryagain:
-
 	    c = GETC();
 
 	    if (c == '\'' || c == EOF)
 	      break;
 
+	    ++chars_seen;
 	    if (c == '\\')
 	      {
 		int ignore = 0;
@@ -1839,18 +1835,76 @@ yylex ()
 		  pedwarn ("ANSI C forbids newline in character constant");
 		lineno++;
 	      }
-#ifdef MAP_CHARACTER
 	    else
-	      c = MAP_CHARACTER (c);
+	      {
+#ifdef MULTIBYTE_CHARS
+		wchar_t wc;
+		int i;
+		int char_len = -1;
+		for (i = 1; i <= longest_char; ++i)
+		  {
+		    if (i > maxtoken - 4)
+		      extend_token_buffer (token_buffer);
+
+		    token_buffer[i] = c;
+		    char_len = local_mbtowc (& wc,
+					     token_buffer + 1,
+					     i);
+		    if (char_len != -1)
+		      break;
+		    c = GETC ();
+		  }
+		if (char_len > 1)
+		  {
+		    /* mbtowc sometimes needs an extra char before accepting */
+		    if (char_len < i)
+		      UNGETC (c);
+		    if (! wide_flag)
+		      {
+			/* Merge character into result; ignore excess chars.  */
+			for (i = 1; i <= char_len; ++i)
+			  {
+			    if (i > max_chars)
+			      break;
+			    if (width < HOST_BITS_PER_INT)
+			      result = (result << width)
+				| (token_buffer[i]
+				   & ((1 << width) - 1));
+			    else
+			      result = token_buffer[i];
+			  }
+			num_chars += char_len;
+			goto tryagain;
+		      }
+		    c = wc;
+		  }
+		else
+		  {
+		    if (char_len == -1)
+		      warning ("Ignoring invalid multibyte character");
+		    if (wide_flag)
+		      c = wc;
+#ifdef MAP_CHARACTER
+		    else
+		      c = MAP_CHARACTER (c);
 #endif
+		  }
+#else /* ! MULTIBYTE_CHARS */
+#ifdef MAP_CHARACTER
+		c = MAP_CHARACTER (c);
+#endif
+#endif /* ! MULTIBYTE_CHARS */
+	      }
 
-	    num_chars++;
-	    if (num_chars > maxtoken - 4)
-	      extend_token_buffer (token_buffer);
-
-	    token_buffer[num_chars] = c;
+	    if (wide_flag)
+	      {
+		if (chars_seen == 1) /* only keep the first one */
+		  result = c;
+		goto tryagain;
+	      }
 
 	    /* Merge character into result; ignore excess chars.  */
+	    num_chars += (width / TYPE_PRECISION (char_type_node));
 	    if (num_chars < max_chars + 1)
 	      {
 		if (width < HOST_BITS_PER_INT)
@@ -1860,19 +1914,16 @@ yylex ()
 	      }
 	  }
 
-	token_buffer[num_chars + 1] = '\'';
-	token_buffer[num_chars + 2] = 0;
-
 	if (c != '\'')
 	  error ("malformatted character constant");
-	else if (num_chars == 0)
+	else if (chars_seen == 0)
 	  error ("empty character constant");
 	else if (num_chars > max_chars)
 	  {
 	    num_chars = max_chars;
 	    error ("character constant too long");
 	  }
-	else if (num_chars != 1 && ! flag_traditional && warn_multichar)
+	else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
 	  warning ("multi-character character constant");
 
 	/* If char type is signed, sign-extend the constant.  */
@@ -1897,22 +1948,6 @@ yylex ()
 	  }
 	else
 	  {
-#ifdef MULTIBYTE_CHARS
-	    /* Set the initial shift state and convert the next sequence.  */
-	    result = 0;
-	    /* In all locales L'\0' is zero and mbtowc will return zero,
-	       so don't use it.  */
-	    if (num_chars > 1
-		|| (num_chars == 1 && token_buffer[1] != '\0'))
-	      {
-		wchar_t wc;
-		(void) mbtowc (NULL_PTR, NULL_PTR, 0);
-		if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
-		  result = wc;
-		else
-		  warning ("Ignoring invalid multibyte character");
-	      }
-#endif
 	    yylval.ttype = build_int_2 (result, 0);
 	    TREE_TYPE (yylval.ttype) = wchar_type_node;
 	  }
@@ -1924,7 +1959,13 @@ yylex ()
     case '"':
     string_constant:
       {
-	c = GETC();
+	unsigned width = wide_flag ? WCHAR_TYPE_SIZE
+	                           : TYPE_PRECISION (char_type_node);
+#ifdef MULTIBYTE_CHARS
+	int longest_char = local_mb_cur_max ();
+	(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
+#endif
+	c = GETC ();
 	p = token_buffer + 1;
 
 	while (c != '"' && c >= 0)
@@ -1935,9 +1976,8 @@ yylex ()
 		c = readescape (&ignore);
 		if (ignore)
 		  goto skipnewline;
-		if (!wide_flag
-		    && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
-		    && c >= (1 << TYPE_PRECISION (char_type_node)))
+		if (width < HOST_BITS_PER_INT
+		    && (unsigned) c >= (1 << width))
 		  pedwarn ("escape sequence out of range for character");
 	      }
 	    else if (c == '\n')
@@ -1946,15 +1986,94 @@ yylex ()
 		  pedwarn ("ANSI C forbids newline in string constant");
 		lineno++;
 	      }
+	    else
+	      {
+#ifdef MULTIBYTE_CHARS
+		wchar_t wc;
+		int i;
+		int char_len = -1;
+		for (i = 0; i < longest_char; ++i)
+		  {
+		    if (p + i == token_buffer + maxtoken)
+		      p = extend_token_buffer (p);
+		    p[i] = c;
 
-	    if (p == token_buffer + maxtoken)
-	      p = extend_token_buffer (p);
-	    *p++ = c;
+		    char_len = local_mbtowc (& wc, p, i + 1);
+		    if (char_len != -1)
+		      break;
+		    c = GETC ();
+		  }
+		if (char_len == -1)
+		  warning ("Ignoring invalid multibyte character");
+		else
+		  {
+		    /* mbtowc sometimes needs an extra char before accepting */
+		    if (char_len <= i)
+		      UNGETC (c);
+		    if (wide_flag)
+		      {
+			*(wchar_t *)p = wc;
+			p += sizeof (wc);
+		      }
+		    else
+		      p += (i + 1);
+		    c = GETC ();
+		    continue;
+		  }
+#endif /* MULTIBYTE_CHARS */
+	      }
+
+	    /* Add this single character into the buffer either as a wchar_t
+	       or as a single byte.  */
+	    if (wide_flag)
+	      {
+		unsigned width = TYPE_PRECISION (char_type_node);
+		unsigned bytemask = (1 << width) - 1;
+		int byte;
+
+		if (p + WCHAR_BYTES >= token_buffer + maxtoken)
+		  p = extend_token_buffer (p);
+
+		for (byte = 0; byte < WCHAR_BYTES; ++byte)
+		  {
+		    int value;
+		    if (byte >= sizeof (c))
+		      value = 0;
+		    else
+		      value = (c >> (byte * width)) & bytemask;
+		    if (BYTES_BIG_ENDIAN)
+		      p[WCHAR_BYTES - byte - 1] = value;
+		    else
+		      p[byte] = value;
+		  }
+		p += WCHAR_BYTES;
+	      }
+	    else
+	      {
+		if (p == token_buffer + maxtoken)
+		  p = extend_token_buffer (p);
+		*p++ = c;
+	      }
 
 	  skipnewline:
-	    c = GETC();
+	    c = GETC ();
+	  }
+
+	/* Terminate the string value, either with a single byte zero
+	   or with a wide zero.  */
+	if (wide_flag)
+	  {
+	    if (p + WCHAR_BYTES >= token_buffer + maxtoken)
+	      p = extend_token_buffer (p);
+	    bzero (p, WCHAR_BYTES);
+	    p += WCHAR_BYTES;
+	  }
+	else
+	  {
+	    if (p == token_buffer + maxtoken)
+	      p = extend_token_buffer (p);
+	    *p++ = 0;
 	  }
-	*p = 0;
 
 	if (c < 0)
 	  error ("Unterminated string constant");
@@ -1964,52 +2083,27 @@ yylex ()
 
 	if (wide_flag)
 	  {
-	    /* If this is a L"..." wide-string, convert the multibyte string
-	       to a wide character string.  */
-	    char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
-	    int len;
-
-#ifdef MULTIBYTE_CHARS
-	    len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
-	    if (len < 0 || len >= (p - token_buffer))
-	      {
-		warning ("Ignoring invalid multibyte string");
-		len = 0;
-	      }
-	    bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
-#else
-	    {
-	      char *wp, *cp;
-
-	      wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0);
-	      bzero (widep, (p - token_buffer) * WCHAR_BYTES);
-	      for (cp = token_buffer + 1; cp < p; cp++)
-		*wp = *cp, wp += WCHAR_BYTES;
-	      len = p - token_buffer - 1;
-	    }
-#endif
-	    yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
+	    yylval.ttype = build_string (p - (token_buffer + 1),
+					 token_buffer + 1);
 	    TREE_TYPE (yylval.ttype) = wchar_array_type_node;
 	    value = STRING;
 	  }
 	else if (objc_flag)
 	  {
 	    /* Return an Objective-C @"..." constant string object.  */
-	    yylval.ttype = build_objc_string (p - token_buffer,
+	    yylval.ttype = build_objc_string (p - (token_buffer + 1),
 					      token_buffer + 1);
 	    TREE_TYPE (yylval.ttype) = char_array_type_node;
 	    value = OBJC_STRING;
 	  }
 	else
 	  {
-	    yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
+	    yylval.ttype = build_string (p - (token_buffer + 1),
+					 token_buffer + 1);
 	    TREE_TYPE (yylval.ttype) = char_array_type_node;
 	    value = STRING;
 	  }
 
-	*p++ = '"';
-	*p = 0;
-
 	break;
       }
 
diff --git a/gcc/cccp.c b/gcc/cccp.c
index 1bd7649..55b6e68 100644
--- a/gcc/cccp.c
+++ b/gcc/cccp.c
@@ -45,6 +45,11 @@ typedef unsigned char U_CHAR;
 #include "gansidecl.h"
 #include "pcp.h"
 
+#ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
+#include <locale.h>
+#endif /* MULTIBYTE_CHARS */
+
 #ifndef GET_ENVIRONMENT
 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ENV_VALUE = getenv (ENV_NAME)
 #endif
@@ -1308,6 +1313,12 @@ main (argc, argv)
   bzero ((char *) pend_assertions, argc * sizeof (char *));
   bzero ((char *) pend_includes, argc * sizeof (char *));
 
+#ifdef MULTIBYTE_CHARS
+  /* Change to the native locale for multibyte conversions.  */
+  setlocale (LC_CTYPE, "");
+  GET_ENVIRONMENT (literal_codeset, "LANG");
+#endif
+
   /* Process switches and find input file name.  */
 
   for (i = 1; i < argc; i++) {
@@ -2774,9 +2785,27 @@ do { ip = &instack[indepth];		\
 	      bp += 2;
 	    else if (*bp == '/' && bp[1] == '*') {
 	      bp += 2;
-	      while (!(*bp == '*' && bp[1] == '/'))
-		bp++;
-	      bp += 2;
+	      while (1)
+		{
+		  if (*bp == '*')
+		    {
+		      if (bp[1] == '/')
+			{
+			  bp += 2;
+			  break;
+			}
+		    }
+		  else
+		    {
+#ifdef MULTIBYTE_CHARS
+		      int length;
+		      length = local_mblen (bp, limit - bp);
+		      if (length > 1)
+			bp += (length - 1);
+#endif
+		    }
+		  bp++;
+		}
 	    }
 	    /* There is no point in trying to deal with C++ // comments here,
 	       because if there is one, then this # must be part of the
@@ -2937,6 +2966,24 @@ do { ip = &instack[indepth];		\
 	  if (ibp[-1] == c)
 	    goto while2end;
 	  break;
+#ifdef MULTIBYTE_CHARS
+	default:
+	  {
+	    int length;
+	    --ibp;
+	    length = local_mblen (ibp, limit - ibp);
+	    if (length > 0)
+	      {
+		--obp;
+		bcopy (ibp, obp, length);
+		obp += length;
+		ibp += length;
+	      }
+	    else
+	      ++ibp;
+	  }
+	  break;
+#endif
 	}
       }
     while2end:
@@ -2983,6 +3030,15 @@ do { ip = &instack[indepth];		\
 		*obp++ = '\n';
 	      ++op->lineno;
 	    }
+	    else
+	      {
+#ifdef MULTIBYTE_CHARS
+		int length;
+		length = local_mblen (ibp, limit - ibp);
+		if (length > 1)
+		  ibp += (length - 1);
+#endif
+	      }
 	  }
 	  break;
 	}
@@ -3071,6 +3127,16 @@ do { ip = &instack[indepth];		\
 	      goto limit_reached;
 	    }
 	    break;
+#ifdef MULTIBYTE_CHARS
+	  default:
+	    {
+	      int length;
+	      length = local_mblen (ibp, limit - ibp);
+	      if (length > 1)
+		ibp += (length - 1);
+	    }
+	    break;
+#endif
 	  }
 	}
       comment_end:
@@ -3433,11 +3499,27 @@ randomchar:
 			      break;
 			    }
 			  }
-			  if (*ibp == '\n') {
+			  else if (*ibp == '\n') {
 			    /* Newline in a file.  Count it.  */
 			    ++ip->lineno;
 			    ++op->lineno;
 			  }
+			  else
+			    {
+#ifdef MULTIBYTE_CHARS
+			      int length;
+			      length = local_mblen (ibp, limit - ibp);
+			      if (length > 1)
+				{
+				  if (put_out_comments)
+				    {
+				      bcopy (ibp, obp, length - 1);
+				      obp += length - 1;
+				    }
+				  ibp += (length - 1);
+				}
+#endif
+			    }
 			  if (put_out_comments)
 			    *obp++ = *ibp;
 			}
@@ -3448,9 +3530,32 @@ randomchar:
 			} else if (! traditional) {
 			  *obp++ = ' ';
 			}
-			for (ibp += 2; *ibp != '\n' || ibp[-1] == '\\'; ibp++)
-			  if (put_out_comments)
-			    *obp++ = *ibp;
+			for (ibp += 2; ; ibp++)
+			  {
+			    if (*ibp == '\n')
+			      {
+				if (ibp[-1] != '\\')
+				  break;
+			      }
+			    else
+			      {
+#ifdef MULTIBYTE_CHARS
+				int length;
+				length = local_mblen (ibp, limit - ibp);
+				if (length > 1)
+				  {
+				    if (put_out_comments)
+				      {
+					bcopy (ibp, obp, length - 1);
+					obp += length - 1;
+				      }
+				    ibp += (length - 1);
+				  }
+#endif
+			      }
+			    if (put_out_comments)
+			      *obp++ = *ibp;
+			  }
 		      } else
 			break;
 		    }
@@ -6186,6 +6291,25 @@ collect_expansion (buf, end, nargs, arglist)
       }
     }
 
+#ifdef MULTIBYTE_CHARS
+    /* Handle multibyte characters inside string and character literals.  */
+    if (expected_delimiter != '\0')
+      {
+	int length;
+	--p;
+	length = local_mblen (p, limit - p);
+	if (length > 1)
+	  {
+	    --exp_p;
+	    bcopy (p, exp_p, length);
+	    p += length;
+	    exp_p += length;
+	    continue;
+	  }
+	++p;
+      }
+#endif
+
     /* Handle the start of a symbol.  */
     if (is_idchar[c] && nargs > 0) {
       U_CHAR *id_beg = p - 1;
@@ -7412,9 +7536,27 @@ skip_if_group (ip, any, op)
 	    bp += 2;
 	  else if (*bp == '/' && bp[1] == '*') {
 	    bp += 2;
-	    while (!(*bp == '*' && bp[1] == '/'))
-	      bp++;
-	    bp += 2;
+	    while (1)
+	      {
+		if (*bp == '*')
+		  {
+		    if (bp[1] == '/')
+		      {
+			bp += 2;
+			break;
+		      }
+		  }
+		else
+		  {
+#ifdef MULTIBYTE_CHARS
+		    int length;
+		    length = local_mblen (bp, endb - bp);
+		    if (length > 1)
+		      bp += (length - 1);
+#endif
+		  }
+		bp++;
+	      }
 	  }
 	  /* There is no point in trying to deal with C++ // comments here,
 	     because if there is one, then this # must be part of the
@@ -7458,6 +7600,15 @@ skip_if_group (ip, any, op)
 		if (bp[1] == '/')
 		  break;
 	      }
+	      else
+		{
+#ifdef MULTIBYTE_CHARS
+		  int length;
+		  length = local_mblen (bp, endb - bp);
+		  if (length > 1)
+		    bp += (length - 1);
+#endif
+		}
 	    }
 	    bp += 2;
 	  } else if (bp[1] == '/' && cplusplus_comments) {
@@ -7469,6 +7620,15 @@ skip_if_group (ip, any, op)
 		  warning ("multiline `//' comment");
 		ip->lineno++;
 	      }
+	      else
+		{
+#ifdef MULTIBYTE_CHARS
+		  int length;
+		  length = local_mblen (bp, endb - bp);
+		  if (length > 1)
+		    bp += (length - 1);
+#endif
+		}
 	    }
 	  } else
 	    break;
@@ -7764,6 +7924,15 @@ validate_else (p, limit)
 	      break;
 	    }
 	  }
+	  else
+	    {
+#ifdef MULTIBYTE_CHARS
+	      int length;
+	      length = local_mblen (p, limit - p);
+	      if (length > 1)
+		p += (length - 1);
+#endif
+	    }
 	}
       }
       else if (cplusplus_comments && p[1] == '/')
@@ -7817,6 +7986,22 @@ skip_to_end_of_comment (ip, line_counter, nowarn)
 	if (op)
 	  ++op->lineno;
       }
+      else
+	{
+#ifdef MULTIBYTE_CHARS
+	  int length;
+	  length = local_mblen (bp, limit - bp);
+	  if (length > 1)
+	    {
+	      if (op)
+		{
+		  bcopy (bp, op->bufp, length - 1);
+		  op->bufp += (length - 1);
+		}
+	      bp += (length - 1);
+	    }
+#endif
+	}
       if (op)
 	*op->bufp++ = *bp;
     }
@@ -7854,6 +8039,23 @@ skip_to_end_of_comment (ip, line_counter, nowarn)
 	return bp;
       }
       break;
+#ifdef MULTIBYTE_CHARS
+    default:
+      {
+	int length;
+	bp--;
+	length = local_mblen (bp, limit - bp);
+	if (length <= 0)
+	  length = 1;
+	if (op)
+	  {
+	    op->bufp--;
+	    bcopy (bp, op->bufp, length);
+	    op->bufp += length;
+	  }
+	bp += length;
+      }
+#endif
     }
   }
 
@@ -7944,6 +8146,16 @@ skip_quoted_string (bp, limit, start_line, count_newlines, backslash_newlines_p,
       }
     } else if (c == match)
       break;
+#ifdef MULTIBYTE_CHARS
+    {
+      int length;
+      --bp;
+      length = local_mblen (bp, limit - bp);
+      if (length <= 0)
+	length = 1;
+      bp += length;
+    }
+#endif
   }
   return bp;
 }
@@ -8381,9 +8593,23 @@ macroexpand (hp, op)
 	    else {
 	      if (c == '\\')
 		escaped = 1;
-	      if (in_string) {
+	      else if (in_string) {
 		if (c == in_string)
 		  in_string = 0;
+		else
+		  {
+#ifdef MULTIBYTE_CHARS
+		    int length;
+		    length = local_mblen (arg->raw + i, arglen - i);
+		    if (length > 1)
+		      {
+			bcopy (arg->raw + i, xbuf + totlen, length);
+			i += length - 1;
+			totlen += length;
+			continue;
+		      }
+#endif
+		  }
 	      } else if (c == '\"' || c == '\'')
 		in_string = c;
 	    }
@@ -8717,6 +8943,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
 	      break;
 	    }
 	  }
+	  else
+	    {
+#ifdef MULTIBYTE_CHARS
+	      int length;
+	      length = local_mblen (bp, limit - bp);
+	      if (length > 1)
+		bp += (length - 1);
+#endif
+	    }
 	}
       } else if (bp[1] == '/' && cplusplus_comments) {
 	*comments = 1;
@@ -8728,6 +8963,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
 	    if (warn_comments)
 	      warning ("multiline `//' comment");
 	  }
+	  else
+	    {
+#ifdef MULTIBYTE_CHARS
+	      int length;
+	      length = local_mblen (bp, limit - bp);
+	      if (length > 1)
+		bp += (length - 1);
+#endif
+	    }
 	}
       }
       break;
@@ -8751,6 +8995,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
 	    if (quotec == '\'')
 	      break;
 	  }
+	  else
+	    {
+#ifdef MULTIBYTE_CHARS
+	      int length;
+	      length = local_mblen (bp, limit - bp);
+	      if (length > 1)
+		bp += (length - 1);
+#endif
+	    }
 	}
       }
       break;
@@ -8828,8 +9081,23 @@ discard_comments (start, length, newlines)
 	/* Comments are equivalent to spaces.  */
 	obp[-1] = ' ';
 	ibp++;
-	while (ibp < limit && (*ibp != '\n' || ibp[-1] == '\\'))
-	  ibp++;
+	while (ibp < limit)
+	  {
+	    if (*ibp == '\n')
+	      {
+		if (ibp[-1] != '\\')
+		  break;
+	      }
+	    else
+	      {
+#ifdef MULTIBYTE_CHARS
+		int length = local_mblen (ibp, limit - ibp);
+		if (length > 1)
+		  ibp += (length - 1);
+#endif
+	      }
+	    ibp++;
+	  }
 	break;
       }
       if (ibp[0] != '*' || ibp + 1 >= limit)
@@ -8849,6 +9117,14 @@ discard_comments (start, length, newlines)
 	    break;
 	  }
 	}
+	else
+	  {
+#ifdef MULTIBYTE_CHARS
+	    int length = local_mblen (ibp, limit - ibp);
+	    if (length > 1)
+	      ibp += (length - 1);
+#endif
+	  }
       }
       break;
 
@@ -8863,9 +9139,12 @@ discard_comments (start, length, newlines)
 	  *obp++ = c = *ibp++;
 	  if (c == quotec)
 	    break;
-	  if (c == '\n' && quotec == '\'')
-	    break;
-	  if (c == '\\') {
+	  if (c == '\n')
+	    {
+	      if (quotec == '\'')
+		break;
+	    }
+	  else if (c == '\\') {
 	    if (ibp < limit && *ibp == '\n') {
 	      ibp++;
 	      obp--;
@@ -8876,6 +9155,23 @@ discard_comments (start, length, newlines)
 		*obp++ = *ibp++;
 	    }
 	  }
+	  else
+	    {
+#ifdef MULTIBYTE_CHARS
+	      int length;
+	      ibp--;
+	      length = local_mblen (ibp, limit - ibp);
+	      if (length > 1)
+		{
+		  obp--;
+		  bcopy (ibp, obp, length);
+		  ibp += length;
+		  obp += length;
+		}
+	      else
+		ibp++;
+#endif
+	    }
 	}
       }
       break;
@@ -8925,10 +9221,33 @@ change_newlines (start, length)
 	int quotec = c;
 	while (ibp < limit) {
 	  *obp++ = c = *ibp++;
-	  if (c == quotec && ibp[-2] != '\\')
-	    break;
-	  if (c == '\n' && quotec == '\'')
-	    break;
+	  if (c == quotec)
+	    {
+	      if (ibp[-2] != '\\')
+		break;
+	    }
+	  else if (c == '\n')
+	    {
+	      if (quotec == '\'')
+		break;
+	    }
+	  else
+	    {
+#ifdef MULTIBYTE_CHARS
+	      int length;
+	      ibp--;
+	      length = local_mblen (ibp, limit - ibp);
+	      if (length > 1)
+		{
+		  obp--;
+		  bcopy (ibp, obp, length);
+		  ibp += length;
+		  obp += length;
+		}
+	      else
+		ibp++;
+#endif
+	    }
 	}
       }
       break;
diff --git a/gcc/cexp.y b/gcc/cexp.y
index 6280aed..5d36329 100644
--- a/gcc/cexp.y
+++ b/gcc/cexp.y
@@ -39,12 +39,12 @@ Boston, MA 02111-1307, USA.
 #include "system.h"
 #include <setjmp.h>
 /* #define YYDEBUG 1 */
+#include "gansidecl.h"
 
 #ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
 #include <locale.h>
-#endif
-
-#include "gansidecl.h"
+#endif /* MULTIBYTE_CHARS */
 
 typedef unsigned char U_CHAR;
 
@@ -641,23 +641,18 @@ yylex ()
     {
       register HOST_WIDE_INT result = 0;
       register int num_chars = 0;
+      int chars_seen = 0;
       unsigned width = MAX_CHAR_TYPE_SIZE;
       int max_chars;
-      char *token_buffer;
-
-      if (wide_flag)
-	{
-	  width = MAX_WCHAR_TYPE_SIZE;
 #ifdef MULTIBYTE_CHARS
-	  max_chars = MB_CUR_MAX;
-#else
-	  max_chars = 1;
+      int longest_char = local_mb_cur_max ();
+      char *token_buffer = (char *) alloca (longest_char);
+      (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
 #endif
-	}
-      else
-	max_chars = MAX_LONG_TYPE_SIZE / width;
 
-      token_buffer = (char *) alloca (max_chars + 1);
+      max_chars = MAX_LONG_TYPE_SIZE / width;
+      if (wide_flag)
+	width = MAX_WCHAR_TYPE_SIZE;
 
       while (1)
 	{
@@ -666,44 +661,96 @@ yylex ()
 	  if (c == '\'' || c == EOF)
 	    break;
 
+	  ++chars_seen;
 	  if (c == '\\')
 	    {
 	      c = parse_escape (&lexptr, mask);
 	    }
+	  else
+	    {
+#ifdef MULTIBYTE_CHARS
+	      wchar_t wc;
+	      int i;
+	      int char_len = -1;
+	      for (i = 1; i <= longest_char; ++i)
+		{
+		  token_buffer[i - 1] = c;
+		  char_len = local_mbtowc (& wc, token_buffer, i);
+		  if (char_len != -1)
+		    break;
+		  c = *lexptr++;
+		}
+	      if (char_len > 1)
+		{
+		  /* mbtowc sometimes needs an extra char before accepting */
+		  if (char_len < i)
+		    lexptr--;
+		  if (! wide_flag)
+		    {
+		      /* Merge character into result; ignore excess chars.  */
+		      for (i = 1; i <= char_len; ++i)
+			{
+			  if (i > max_chars)
+			    break;
+			  if (width < HOST_BITS_PER_INT)
+			    result = (result << width)
+			      | (token_buffer[i - 1]
+				 & ((1 << width) - 1));
+			  else
+			    result = token_buffer[i - 1];
+			}
+		      num_chars += char_len;
+		      continue;
+		    }
+		}
+	      else
+		{
+		  if (char_len == -1)
+		    warning ("Ignoring invalid multibyte character");
+		}
+	      if (wide_flag)
+		c = wc;
+#endif /* ! MULTIBYTE_CHARS */
+	    }
 
-	  num_chars++;
+	  if (wide_flag)
+	    {
+	      if (chars_seen == 1) /* only keep the first one */
+		result = c;
+	      continue;
+	    }
 
 	  /* Merge character into result; ignore excess chars.  */
+	  num_chars++;
 	  if (num_chars <= max_chars)
 	    {
-	      if (width < HOST_BITS_PER_WIDE_INT)
-		result = (result << width) | c;
+	      if (width < HOST_BITS_PER_INT)
+		result = (result << width) | (c & ((1 << width) - 1));
 	      else
 		result = c;
-	      token_buffer[num_chars - 1] = c;
 	    }
 	}
 
-      token_buffer[num_chars] = 0;
-
       if (c != '\'')
 	error ("malformatted character constant");
-      else if (num_chars == 0)
+      else if (chars_seen == 0)
 	error ("empty character constant");
       else if (num_chars > max_chars)
 	{
 	  num_chars = max_chars;
 	  error ("character constant too long");
 	}
-      else if (num_chars != 1 && ! traditional)
+      else if (chars_seen != 1 && ! traditional)
 	warning ("multi-character character constant");
 
       /* If char type is signed, sign-extend the constant.  */
       if (! wide_flag)
 	{
 	  int num_bits = num_chars * width;
-
-	  if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
+	  if (num_bits == 0)
+	    /* We already got an error; avoid invalid shift.  */
+	    yylval.integer.value = 0;
+	  else if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
 		      sizeof ("__CHAR_UNSIGNED__") - 1, -1)
 	      || ((result >> (num_bits - 1)) & 1) == 0)
 	    yylval.integer.value
@@ -716,22 +763,6 @@ yylex ()
 	}
       else
 	{
-#ifdef MULTIBYTE_CHARS
-	  /* Set the initial shift state and convert the next sequence.  */
-	  result = 0;
-	  /* In all locales L'\0' is zero and mbtowc will return zero,
-	     so don't use it.  */
-	  if (num_chars > 1
-	      || (num_chars == 1 && token_buffer[0] != '\0'))
-	    {
-	      wchar_t wc;
-	      (void) mbtowc (NULL_PTR, NULL_PTR, 0);
-	      if (mbtowc (& wc, token_buffer, num_chars) == num_chars)
-		result = wc;
-	      else
-		pedwarn ("Ignoring invalid multibyte character");
-	    }
-#endif
 	  yylval.integer.value = result;
 	}
     }
diff --git a/gcc/configure.in b/gcc/configure.in
index 6791547..b4c1aca 100644
--- a/gcc/configure.in
+++ b/gcc/configure.in
@@ -84,7 +84,7 @@ AC_DEFINE(ENABLE_CHECKING)
 # Enable use of cpplib for C.
 cpp_main=cccp
 AC_ARG_ENABLE(c-cpplib,
-[  --enable-c-cpplib       Use cpplib for C.],
+[  --enable-c-cpplib       Use cpplib for C and C++.],
 if [[[ x$enable_c_cpplib != xno ]]]; then
   extra_c_objs="${extra_c_objs} cpplib.o cppexp.o cpphash.o cpperror.o"
   extra_c_objs="${extra_c_objs} prefix.o"
@@ -93,6 +93,13 @@ if [[[ x$enable_c_cpplib != xno ]]]; then
   cpp_main=cppmain
 fi)
   
+# Enable Multibyte Characters for C/C++
+AC_ARG_ENABLE(c-mbchar,
+[  --enable-c-mbchar       Enable multibyte characters for C and C++.],
+if [[[ x$enable_c_mbchar != xno ]]]; then
+  extra_c_flags=-DMULTIBYTE_CHARS=1
+fi)
+  
 # Enable Haifa scheduler.
 AC_ARG_ENABLE(haifa,
 [  --enable-haifa          Use the experimental scheduler.
@@ -193,6 +200,9 @@ AC_CHECK_FUNCS(strtoul bsearch strerror putenv popen bcopy bzero bcmp \
 	index rindex strchr strrchr kill getrlimit setrlimit atoll atoq \
 	sysconf isascii gettimeofday)
 
+# Make sure wchar_t is available
+#AC_CHECK_TYPE(wchar_t, unsigned int)
+
 GCC_FUNC_VFPRINTF_DOPRNT
 GCC_FUNC_PRINTF_PTR
 
@@ -3585,6 +3595,7 @@ AC_SUBST(extra_programs)
 AC_SUBST(extra_parts)
 AC_SUBST(extra_c_objs)
 AC_SUBST(extra_cxx_objs)
+AC_SUBST(extra_cpp_objs)
 AC_SUBST(extra_c_flags)
 AC_SUBST(extra_objs)
 AC_SUBST(host_extra_gcc_objs)
diff --git a/gcc/invoke.texi b/gcc/invoke.texi
index 8056b84..3b3ad43 100644
--- a/gcc/invoke.texi
+++ b/gcc/invoke.texi
@@ -5964,8 +5964,9 @@ the language standard.  You should not need to use these options yourself.
 @cindex environment variables
 
 This section describes several environment variables that affect how GNU
-CC operates.  They work by specifying directories or prefixes to use
-when searching for various kinds of files.
+CC operates.  Some of them work by specifying directories or prefixes to use
+when searching for various kinds of files. Some are used to specify other
+ascpects of the compilation environment.
 
 @ifclear INTERNALS
 Note that you can also specify places to search using options such as
@@ -6065,6 +6066,28 @@ which case the Make rules are written to that file, guessing the target
 name from the source file name.  Or the value can have the form
 @samp{@var{file} @var{target}}, in which case the rules are written to
 file @var{file} using @var{target} as the target name.
+
+@item LANG
+@findex LANG
+@cindex locale definition
+This variable is used to pass locale information to the compiler. One way in
+which this information is used is to determine the character set to be used
+when character literals, string literals and comments are parsed in C and C++.
+When the compiler is configured to allow multibyte characters,
+the following values for @code{LANG} are recognized:
+
+@table @code
+@item C-JIS
+Recognize JIS characters.
+@item C-SJIS
+Recognize SJIS characters.
+@item C-EUCJP
+Recognize EUCJP characters.
+@end table
+
+If @code{LANG} is not defined, or if it has some ther value, then the
+compiler will use mblen and mbtowc as defined by the default locale to
+recognize and translate multibyte characters.
 @end table
 
 @node Running Protoize
diff --git a/gcc/mbchar.c b/gcc/mbchar.c
new file mode 100644
index 0000000..d54a497
--- /dev/null
+++ b/gcc/mbchar.c
@@ -0,0 +1,288 @@
+/* Multibyte Character Functions.
+   Copyright (C) 1998 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* These functions are used to manipulate multibyte characters.  */
+
+/* Note regarding cross compilation:
+
+   In general translation of multibyte characters to wide characters can
+   only work in a native compiler since the translation function (mbtowc)
+   needs to know about both the source and target character encoding.  However,
+   this particular implementation for JIS, SJIS and EUCJP source characters
+   will work for any compiler with a newlib target.  Other targets may also
+   work provided that their wchar_t implementation is 2 bytes and the encoding
+   leaves the source character values unchanged (except for removing the
+   state shifting markers).  */
+
+#ifdef MULTIBYTE_CHARS
+#include "config.h"
+#include "system.h"
+#include "gansidecl.h"
+#include "mbchar.h"
+#include <locale.h>
+
+typedef enum
+{
+  ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
+} JIS_CHAR_TYPE;
+
+typedef enum
+{
+  ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
+  J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
+} JIS_STATE; 
+
+typedef enum
+{
+  COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
+} JIS_ACTION;
+
+/*****************************************************************************
+ * state/action tables for processing JIS encoding
+ * Where possible, switches to JIS are grouped with proceding JIS characters
+ * and switches to ASCII are grouped with preceding JIS characters.
+ * Thus, maximum returned length is:
+ *   2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
+ *****************************************************************************/
+static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
+/*            ESCAPE DOLLAR   BRACKET   AT     B      J     NUL JIS_CHAR OTHER*/
+/*ASCII*/   { A_ESC, ASCII,   ASCII,    ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
+/*A_ESC*/   { ASCII, A_ESC_DL,ASCII,    ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
+/*A_ESC_DL*/{ ASCII, ASCII,   ASCII,    JIS,   JIS,   ASCII, ASCII,ASCII,ASCII},
+/*JIS*/     { J_ESC, JIS_1,   JIS_1,    JIS_1, JIS_1, JIS_1, INV,  JIS_1,INV },
+/*JIS_1*/   { INV,   JIS_2,   JIS_2,    JIS_2, JIS_2, JIS_2, INV,  JIS_2,INV },
+/*JIS_2*/   { J2_ESC,JIS,     JIS,      JIS,   JIS,   JIS,   INV,  JIS,  JIS },
+/*J_ESC*/   { INV,   INV,     J_ESC_BR, INV,   INV,   INV,   INV,  INV,  INV },
+/*J_ESC_BR*/{ INV,   INV,     INV,      INV,   ASCII, ASCII, INV,  INV,  INV },
+/*J2_ESC*/  { INV,   INV,     J2_ESC_BR,INV,   INV,   INV,   INV,  INV,  INV },
+/*J2_ESC_BR*/{INV,   INV,     INV,      INV,   ASCII, ASCII, INV,  INV,  INV },
+};
+
+static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
+/*            ESCAPE DOLLAR BRACKET AT     B       J      NUL  JIS_CHAR OTHER */
+/*ASCII */   {NOOP,  COPYA, COPYA, COPYA,  COPYA,  COPYA, EMPTY, COPYA, COPYA},
+/*A_ESC */   {COPYA, NOOP,  COPYA, COPYA,  COPYA,  COPYA, COPYA, COPYA, COPYA},
+/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
+/*JIS */     {NOOP,  NOOP,  NOOP,  NOOP,   NOOP,   NOOP,  ERROR, NOOP,  ERROR },
+/*JIS_1 */   {ERROR, NOOP,  NOOP,  NOOP,   NOOP,   NOOP,  ERROR, NOOP,  ERROR },
+/*JIS_2 */   {NOOP,  COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
+/*J_ESC */   {ERROR, ERROR, NOOP,  ERROR,  ERROR,  ERROR, ERROR, ERROR, ERROR },
+/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR,  NOOP,   NOOP,  ERROR, ERROR, ERROR },
+/*J2_ESC */  {ERROR, ERROR, NOOP,  ERROR,  ERROR,  ERROR, ERROR, ERROR, ERROR },
+/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR,  COPYJ,  COPYJ, ERROR, ERROR, ERROR },
+};
+
+
+char *literal_codeset = NULL;
+
+int
+local_mbtowc (pwc, s, n)
+     wchar_t       *pwc;
+     const char    *s;
+     size_t         n;
+{
+  static JIS_STATE save_state = ASCII;
+  JIS_STATE curr_state = save_state;
+  unsigned char *t = (unsigned char *)s;
+
+  if (s != NULL && n == 0)
+    return -1;
+
+  if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
+    {
+      /* This must be the "C" locale or unknown locale -- fall thru */
+    }
+  else if (! strcmp (literal_codeset, "C-SJIS"))
+    {
+      int char1;
+      if (s == NULL)
+        return 0;  /* not state-dependent */
+      char1 = *t;
+      if (ISSJIS1 (char1))
+        {
+          int char2 = t[1];
+          if (n <= 1)
+            return -1;
+          if (ISSJIS2 (char2))
+            {
+	      if (pwc != NULL)
+		*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+              return 2;
+            }
+	  return -1;
+        }
+      if (pwc != NULL)
+	*pwc = (wchar_t)*t;
+      if (*t == '\0')
+	return 0;
+      return 1;
+    }
+  else if (! strcmp (literal_codeset, "C-EUCJP"))
+    {
+      int char1;
+      if (s == NULL)
+        return 0;  /* not state-dependent */
+      char1 = *t;
+      if (ISEUCJP (char1))
+        {
+          int char2 = t[1];     
+          if (n <= 1)
+            return -1;
+          if (ISEUCJP (char2))
+            {
+	      if (pwc != NULL)
+		*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+              return 2;
+            }
+	  return -1;
+        }
+      if (pwc != NULL)
+	*pwc = (wchar_t)*t;
+      if (*t == '\0')
+	return 0;
+      return 1;
+    }
+  else if (! strcmp (literal_codeset, "C-JIS"))
+    {
+      JIS_ACTION action;
+      JIS_CHAR_TYPE ch;
+      unsigned char *ptr;
+      int i, curr_ch;
+ 
+      if (s == NULL)
+	{
+	  save_state = ASCII;
+	  return 1;  /* state-dependent */
+	}
+
+      ptr = t;
+
+      for (i = 0; i < n; ++i)
+        {
+          curr_ch = t[i];
+          switch (curr_ch)
+            {
+	    case JIS_ESC_CHAR:
+              ch = ESCAPE;
+              break;
+	    case '$':
+              ch = DOLLAR;
+              break;
+            case '@':
+              ch = AT;
+              break;
+            case '(':
+	      ch = BRACKET;
+              break;
+            case 'B':
+              ch = B;
+              break;
+            case 'J':
+              ch = J;
+              break;
+            case '\0':
+              ch = NUL;
+              break;
+            default:
+              if (ISJIS (curr_ch))
+                ch = JIS_CHAR;
+              else
+                ch = OTHER;
+	    }
+
+          action = JIS_action_table[curr_state][ch];
+          curr_state = JIS_state_table[curr_state][ch];
+        
+          switch (action)
+            {
+            case NOOP:
+              break;
+            case EMPTY:
+	      if (pwc != NULL)
+		*pwc = (wchar_t)0;
+	      save_state = curr_state;
+              return i;
+            case COPYA:
+	      if (pwc != NULL)
+		*pwc = (wchar_t)*ptr;
+	      save_state = curr_state;
+              return (i + 1);
+            case COPYJ:
+	      if (pwc != NULL)
+		*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+	      save_state = curr_state;
+              return (i + 1);
+            case COPYJ2:
+	      if (pwc != NULL)
+		*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+	      save_state = curr_state;
+              return (ptr - t) + 2;
+            case MAKE_A:
+            case MAKE_J:
+              ptr = (char *)(t + i + 1);
+              break;
+            case ERROR:
+            default:
+              return -1;
+            }
+        }
+
+      return -1;  /* n < bytes needed */
+    }
+               
+#ifdef CROSS_COMPILE
+  if (s == NULL)
+    return 0;  /* not state-dependent */
+  if (pwc != NULL)
+    *pwc = *s;
+  return 1;
+#else
+  /* This must be the "C" locale or unknown locale. */
+  return mbtowc (pwc, s, n);
+#endif
+}
+
+int
+local_mblen (s, n)
+     const char    *s;
+     size_t         n;
+{
+  return local_mbtowc (NULL, s, n);
+}
+
+int
+local_mb_cur_max ()
+{
+  if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
+    ;
+  else if (! strcmp (literal_codeset, "C-SJIS"))
+    return 2;
+  else if (! strcmp (literal_codeset, "C-EUCJP"))
+    return 2;
+  else if (! strcmp (literal_codeset, "C-JIS"))
+    return 8; /* 3 + 2 + 3 */
+
+#ifdef CROSS_COMPILE
+  return 1;
+#else
+  return MB_CUR_MAX;
+#endif
+}
+#endif /* MULTIBYTE_CHARS */
diff --git a/gcc/mbchar.h b/gcc/mbchar.h
new file mode 100644
index 0000000..a4b82c0
--- /dev/null
+++ b/gcc/mbchar.h
@@ -0,0 +1,25 @@
+/* mbchar.h - Various declarations for functions found in mbchar.c
+   Copyright (C) 1998 Free Software Foundation, Inc.
+ */
+
+#ifndef __GCC_MBCHAR_H__
+#define __GCC_MBCHAR_H__
+
+#ifdef MULTIBYTE_CHARS
+/* escape character used for JIS encoding */
+#define JIS_ESC_CHAR 0x1b
+
+#define ISSJIS1(c)   ((c) >= 0x81 && (c) <= 0x9f || (c) >= 0xe0 && (c) <= 0xef)
+#define ISSJIS2(c)   ((c) >= 0x40 && (c) <= 0x7e || (c) >= 0x80 && (c) <= 0xfc)
+#define ISEUCJP(c)   ((c) >= 0xa1 && (c) <= 0xfe)
+#define ISJIS(c)     ((c) >= 0x21 && (c) <= 0x7e)
+
+int local_mbtowc     PROTO ((wchar_t *, const char *, size_t));
+int local_mblen      PROTO ((const char *, size_t));
+int local_mb_cur_max PROTO ((void));
+
+/* The locale being used for multibyte characters in string/char literals.  */
+extern char *literal_codeset;
+#endif /* MULTIBYTE_CHARS */
+
+#endif /* __GCC_MBCHAR_H__ */
author	Dave Brolley <brolley@cygnus.com>	1998-07-20 13:35:38 +0000
committer	Dave Brolley <brolley@gcc.gnu.org>	1998-07-20 09:35:38 -0400
commit	56f48ce9765aa2b6d4742a4923fee581a12c1418 (patch)
tree	671dda9b98d314335b4825d12e41d06427ebd8ce
parent	689fcba8611f93ce241fa090a0423e8d35324027 (diff)
download	gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.zip gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.gz gcc-56f48ce9765aa2b6d4742a4923fee581a12c1418.tar.bz2