8 files changed, 149 insertions, 104 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 144f744..ba33bc4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2002-05-04  Neil Booth  <neil@daikokuya.demon.co.uk>
+
+	* c-lex.c (lex_string): Let cpp_parse_escape handles truncation
+	and sign-extension.
+	(lex_charconst): Update for change in prototype of
+	cpp_interpret_charconst.  Extend from cppchar_t to HOST_WIDE_INT
+	appropriately.
+	* cpphash.h (BITS_PER_CPPCHAR_T): New.
+	* cppinit.c (cpp_create_reader): Initialize them for no
+	change in semantics.
+	(cpp_post_options): Add sanity checks.
+	* cpplex.c (cpp_parse_escape): Handle precision, sign-extension
+	and truncation issues.  Calculate in type cppchar_t.
+	(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove.
+	(cpp_interpret_charconst): Calculate in type cppchar_t.  Handle
+	run-time dependent precision correctly.  Return whether the
+	result is signed or not.
+	* cpplib.c (dequote_string): Use cppchar_t; update.
+	* cpplib.h (cppchar_signed_t): New.
+	struct cpp_options): New precision members.
+	(cpp_interpret_charconst, cpp_parse_escape): Update prototypes.
+
 2002-05-03  David S. Miller  <davem@redhat.com>
 
 	* config/sparc/sparc-protos.h (sparc_rtx_costs): New.
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index acdcf34..0c10f30 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -1238,9 +1238,7 @@ lex_string (str, len, wide)
   char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
   char *q = buf;
   const unsigned char *p = str, *limit = str + len;
-  unsigned int c;
-  unsigned width = wide ? WCHAR_TYPE_SIZE
-			: TYPE_PRECISION (char_type_node);
+  cppchar_t c;
 
 #ifdef MULTIBYTE_CHARS
   /* Reset multibyte conversion state.  */
@@ -1270,15 +1268,7 @@ lex_string (str, len, wide)
 #endif
 
       if (c == '\\' && !ignore_escape_flag)
-	{
-	  unsigned int mask;
-
-	  if (width < HOST_BITS_PER_INT)
-	    mask = ((unsigned int) 1 << width) - 1;
-	  else
-	    mask = ~0;
-	  c = cpp_parse_escape (parse_in, &p, limit, mask);
-	}
+	c = cpp_parse_escape (parse_in, &p, limit, wide);
 	
       /* Add this single character into the buffer either as a wchar_t,
 	 a multibyte sequence, or as a single byte.  */
@@ -1345,45 +1335,31 @@ static tree
 lex_charconst (token)
      const cpp_token *token;
 {
-  HOST_WIDE_INT result;
+  cppchar_t result;
   tree type, value;
   unsigned int chars_seen;
+  int unsignedp;
  
   result = cpp_interpret_charconst (parse_in, token, warn_multichar,
- 				    &chars_seen);
-  if (token->type == CPP_WCHAR)
-    {
-      value = build_int_2 (result, 0);
-      type = wchar_type_node;
-    }
-  else
-    {
-      if (result < 0)
- 	value = build_int_2 (result, -1);
-      else
- 	value = build_int_2 (result, 0);
- 
-      /* In C, a character constant has type 'int'.
- 	 In C++ 'char', but multi-char charconsts have type 'int'.  */
-      if (c_language == clk_cplusplus && chars_seen <= 1)
-	type = char_type_node;
-      else
-	type = integer_type_node;
-    }
+ 				    &chars_seen, &unsignedp);
 
-  /* cpp_interpret_charconst issues a warning if the constant
-     overflows, but if the number fits in HOST_WIDE_INT anyway, it
-     will return it un-truncated, which may cause problems down the
-     line.  So set the type to widest_integer_literal_type, call
-     convert to truncate it to the proper type, then clear
-     TREE_OVERFLOW so we don't get a second warning.
-
-     FIXME: cpplib's assessment of overflow may not be accurate on a
-     platform where the final type can change at (compiler's) runtime.  */
+  /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
+     before possibly widening to HOST_WIDE_INT for build_int_2.  */
+  if (unsignedp || (cppchar_signed_t) result >= 0)
+    value = build_int_2 (result, 0);
+  else
+    value = build_int_2 ((cppchar_signed_t) result, -1);
 
-  TREE_TYPE (value) = widest_integer_literal_type_node;
-  value = convert (type, value);
-  TREE_OVERFLOW (value) = 0;
+  if (token->type == CPP_WCHAR)
+    type = wchar_type_node;
+  /* In C, a character constant has type 'int'.
+     In C++ 'char', but multi-char charconsts have type 'int'.  */
+  else if ((c_language == clk_c || c_language == clk_objective_c)
+	   || chars_seen > 1)
+    type = integer_type_node;
+  else
+    type = char_type_node;
 
+  TREE_TYPE (value) = type;
   return value;
 }
diff --git a/gcc/cppexp.c b/gcc/cppexp.c
index 914a207..b71b02a 100644
--- a/gcc/cppexp.c
+++ b/gcc/cppexp.c
@@ -283,10 +283,10 @@ eval_token (pfile, token)
      const cpp_token *token;
 {
   unsigned int temp;
+  int unsignedp = 0;
   struct op op;
 
   op.op = CPP_NUMBER;
-  op.unsignedp = 0;
 
   switch (token->type)
     {
@@ -294,9 +294,8 @@ eval_token (pfile, token)
       return parse_number (pfile, token);
 
     case CPP_WCHAR:
-      op.unsignedp = WCHAR_UNSIGNED;
-    case CPP_CHAR:		/* Always unsigned.  */
-      op.value = cpp_interpret_charconst (pfile, token, 1, &temp);
+    case CPP_CHAR:
+      op.value = cpp_interpret_charconst (pfile, token, 1, &temp, &unsignedp);
       break;
 
     case CPP_NAME:
@@ -331,6 +330,7 @@ eval_token (pfile, token)
       op.value = temp;
     }
 
+  op.unsignedp = unsignedp;
   return op;
 }
 
diff --git a/gcc/cpphash.h b/gcc/cpphash.h
index 5ad0c6e..7baf8ff 100644
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -29,6 +29,8 @@ struct directive;		/* Deliberately incomplete.  */
 struct pending_option;
 struct op;
 
+#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))
+
 /* Test if a sign is valid within a preprocessing number.  */
 #define VALID_SIGN(c, prevc) \
   (((c) == '+' || (c) == '-') && \
diff --git a/gcc/cppinit.c b/gcc/cppinit.c
index cee7571..cb5b263 100644
--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -502,6 +502,18 @@ cpp_create_reader (lang)
   CPP_OPTION (pfile, pending) =
     (struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending));
 
+  /* CPP arithmetic done to existing rules for now.  */
+#define BITS_PER_HOST_WIDEST_INT (CHAR_BIT * sizeof (HOST_WIDEST_INT))
+  CPP_OPTION (pfile, precision) = BITS_PER_HOST_WIDEST_INT;
+#ifndef MAX_CHAR_TYPE_SIZE
+#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
+#endif
+  CPP_OPTION (pfile, char_precision) = MAX_CHAR_TYPE_SIZE;
+#ifndef MAX_WCHAR_TYPE_SIZE
+#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
+#endif
+  CPP_OPTION (pfile, wchar_precision) = MAX_WCHAR_TYPE_SIZE;
+
   /* It's simplest to just create this struct whether or not it will
      be needed.  */
   pfile->deps = deps_init ();
@@ -1796,6 +1808,27 @@ cpp_post_options (pfile)
       fputc ('\n', stderr);
     }
 
+#if ENABLE_CHECKING
+  /* Sanity checks for CPP arithmetic.  */
+  if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT)
+    cpp_error (pfile, DL_FATAL,
+	       "preprocessor arithmetic has maximum precision of %u bits; target requires %u bits",
+	       BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision));
+
+  if (CPP_OPTION (pfile, char_precision) > BITS_PER_CPPCHAR_T
+      || CPP_OPTION (pfile, wchar_precision) > BITS_PER_CPPCHAR_T)
+    cpp_error (pfile, DL_FATAL,
+	       "CPP cannot handle (wide) character constants over %u bits",
+	       BITS_PER_CPPCHAR_T);
+
+  {
+    cppchar_t test = 0;
+    test--;
+    if (test < 1)
+      cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type");
+  }
+#endif
+
   /* Canonicalize in_fname and out_fname.  We guarantee they are not
      NULL, and that the empty string represents stdin / stdout.  */
   if (CPP_OPTION (pfile, in_fname) == NULL
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index bc12978..0a26049 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -1710,23 +1710,33 @@ maybe_read_ucs (pfile, pstr, limit, pc)
   return 0;
 }
 
-/* Interpret an escape sequence, and return its value.  PSTR points to
-   the input pointer, which is just after the backslash.  LIMIT is how
-   much text we have.  MASK is a bitmask for the precision for the
-   destination type (char or wchar_t).
-
-   Handles all relevant diagnostics.  */
-unsigned int
-cpp_parse_escape (pfile, pstr, limit, mask)
+/* Returns the value of an escape sequence, truncated to the correct
+   target precision.  PSTR points to the input pointer, which is just
+   after the backslash.  LIMIT is how much text we have.  WIDE is true
+   if the escape sequence is part of a wide character constant or
+   string literal.  Handles all relevant diagnostics.  */
+cppchar_t
+cpp_parse_escape (pfile, pstr, limit, wide)
      cpp_reader *pfile;
      const unsigned char **pstr;
      const unsigned char *limit;
-     unsigned HOST_WIDE_INT mask;
+     int wide;
 {
   int unknown = 0;
   const unsigned char *str = *pstr;
-  unsigned int c = *str++;
+  cppchar_t c, mask;
+  unsigned int width;
+
+  if (wide)
+    width = CPP_OPTION (pfile, wchar_precision);
+  else
+    width = CPP_OPTION (pfile, char_precision);
+  if (width < BITS_PER_CPPCHAR_T)
+    mask = ((cppchar_t) 1 << width) - 1;
+  else
+    mask = ~0;
 
+  c = *str++;
   switch (c)
     {
     case '\\': case '\'': case '"': case '?': break;
@@ -1767,7 +1777,7 @@ cpp_parse_escape (pfile, pstr, limit, mask)
 		   "the meaning of '\\x' is different in traditional C");
 
 	{
-	  unsigned int i = 0, overflow = 0;
+	  cppchar_t i = 0, overflow = 0;
 	  int digits_found = 0;
 
 	  while (str < limit)
@@ -1798,8 +1808,8 @@ cpp_parse_escape (pfile, pstr, limit, mask)
     case '0':  case '1':  case '2':  case '3':
     case '4':  case '5':  case '6':  case '7':
       {
-	unsigned int i = c - '0';
-	int count = 0;
+	size_t count = 0;
+	cppchar_t i = c - '0';
 
 	while (str < limit && ++count < 3)
 	  {
@@ -1834,36 +1844,33 @@ cpp_parse_escape (pfile, pstr, limit, mask)
     }
 
   if (c > mask)
-    cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
+    {
+      cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
+      c &= mask;
+    }
 
   *pstr = str;
   return c;
 }
 
-#ifndef MAX_CHAR_TYPE_SIZE
-#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
-#endif
-
-#ifndef MAX_WCHAR_TYPE_SIZE
-#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
-#endif
-
 /* Interpret a (possibly wide) character constant in TOKEN.
-   WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN points
-   to a variable that is filled in with the number of characters seen.  */
-HOST_WIDE_INT
-cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
+   WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
+   points to a variable that is filled in with the number of
+   characters seen, and UNSIGNEDP to a variable that indicates whether
+   the result has signed type.  */
+cppchar_t
+cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen, unsignedp)
      cpp_reader *pfile;
      const cpp_token *token;
      int warn_multi;
      unsigned int *pchars_seen;
+     int *unsignedp;
 {
   const unsigned char *str = token->val.str.text;
   const unsigned char *limit = str + token->val.str.len;
   unsigned int chars_seen = 0;
-  unsigned int width, max_chars, c;
-  unsigned HOST_WIDE_INT mask;
-  HOST_WIDE_INT result = 0;
+  unsigned int width, max_chars;
+  cppchar_t c, mask, result = 0;
   bool unsigned_p;
 
 #ifdef MULTIBYTE_CHARS
@@ -1873,20 +1880,20 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
   /* Width in bits.  */
   if (token->type == CPP_CHAR)
     {
-      width = MAX_CHAR_TYPE_SIZE;
+      width = CPP_OPTION (pfile, char_precision);
       unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
     }
   else
     {
-      width = MAX_WCHAR_TYPE_SIZE;
+      width = CPP_OPTION (pfile, wchar_precision);
       unsigned_p = WCHAR_UNSIGNED;
     }
 
-  if (width < HOST_BITS_PER_WIDE_INT)
-    mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
+  if (width < BITS_PER_CPPCHAR_T)
+    mask = ((cppchar_t) 1 << width) - 1;
   else
     mask = ~0;
-  max_chars = HOST_BITS_PER_WIDE_INT / width;
+  max_chars = BITS_PER_CPPCHAR_T / width;
 
   while (str < limit)
     {
@@ -1911,7 +1918,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
 #endif
 
       if (c == '\\')
-	c = cpp_parse_escape (pfile, &str, limit, mask);
+	c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
 
 #ifdef MAP_CHARACTER
       if (ISPRINT (c))
@@ -1921,7 +1928,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
       /* Merge character into result; ignore excess chars.  */
       if (++chars_seen <= max_chars)
 	{
-	  if (width < HOST_BITS_PER_WIDE_INT)
+	  if (width < BITS_PER_CPPCHAR_T)
 	    result = (result << width) | (c & mask);
 	  else
 	    result = c;
@@ -1943,7 +1950,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
     {
       unsigned int nbits = chars_seen * width;
 
-      mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
+      mask = (cppchar_t) ~0 >> (BITS_PER_CPPCHAR_T - nbits);
       if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
 	result &= mask;
       else
@@ -1951,6 +1958,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
     }
 
   *pchars_seen = chars_seen;
+  *unsignedp = unsigned_p;
   return result;
 }
 
diff --git a/gcc/cpplib.c b/gcc/cpplib.c
index b210209..c90224c 100644
--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -726,23 +726,15 @@ dequote_string (pfile, str, len)
   uchar *result = _cpp_unaligned_alloc (pfile, len + 1);
   uchar *dst = result;
   const uchar *limit = str + len;
-  unsigned int c;
-  unsigned HOST_WIDE_INT mask;
+  cppchar_t c;
 
-  /* We need the mask to match the host's 'unsigned char', not the
-     target's.  */
-  if (CHAR_BIT < HOST_BITS_PER_WIDE_INT)
-    mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1;
-  else
-    mask = ~(unsigned HOST_WIDE_INT)0;
-  
   while (str < limit)
     {
       c = *str++;
       if (c != '\\')
 	*dst++ = c;
       else
-	*dst++ = cpp_parse_escape (pfile, (const uchar **)&str, limit, mask);
+	*dst++ = cpp_parse_escape (pfile, &str, limit, 0);
     }
   *dst++ = '\0';
   return result;
diff --git a/gcc/cpplib.h b/gcc/cpplib.h
index bbf272b..520f2a2 100644
--- a/gcc/cpplib.h
+++ b/gcc/cpplib.h
@@ -190,9 +190,12 @@ struct cpp_token
   } val;
 };
 
-/* A standalone character.  It is unsigned for the same reason we use
-   unsigned char - to avoid signedness issues.  */
+/* A type wide enough to hold any multibyte source character.
+   cpplib's character constant interpreter uses shifts, and so
+   requires an unsigned type.  */
 typedef unsigned int cppchar_t;
+/* Its signed equivalent.  */
+typedef int cppchar_signed_t;
 
 /* Values for opts.dump_macros.
   dump_only means inhibit output of the preprocessed text
@@ -237,6 +240,10 @@ struct cpp_options
   /* -fleading_underscore sets this to "_".  */
   const char *user_label_prefix;
 
+  /* Precision for target CPP arithmetic, target characters and target
+     wide characters, respectively.  */
+  size_t precision, char_precision, wchar_precision;
+
   /* The language we're preprocessing.  */
   enum c_lang lang;
 
@@ -535,9 +542,9 @@ extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
 extern void _cpp_backup_tokens PARAMS ((cpp_reader *, unsigned int));
 
 /* Evaluate a CPP_CHAR or CPP_WCHAR token.  */
-extern HOST_WIDE_INT
+extern cppchar_t
 cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
-				 int, unsigned int *));
+				 int, unsigned int *, int *));
 
 extern void cpp_define PARAMS ((cpp_reader *, const char *));
 extern void cpp_assert PARAMS ((cpp_reader *, const char *));
@@ -600,10 +607,15 @@ extern int cpp_ideq			PARAMS ((const cpp_token *,
 extern void cpp_output_line		PARAMS ((cpp_reader *, FILE *));
 extern void cpp_output_token		PARAMS ((const cpp_token *, FILE *));
 extern const char *cpp_type2name	PARAMS ((enum cpp_ttype));
-extern unsigned int cpp_parse_escape	PARAMS ((cpp_reader *,
-						 const unsigned char **,
-						 const unsigned char *,
-						 unsigned HOST_WIDE_INT));
+/* Returns the value of an escape sequence, truncated to the correct
+   target precision.  PSTR points to the input pointer, which is just
+   after the backslash.  LIMIT is how much text we have.  WIDE is true
+   if the escape sequence is part of a wide character constant or
+   string literal.  Handles all relevant diagnostics.  */
+extern cppchar_t cpp_parse_escape	PARAMS ((cpp_reader *,
+						 const unsigned char ** pstr,
+						 const unsigned char *limit,
+						 int wide));
 
 /* In cpphash.c */