From f8710242d38de963d4d8204af1058cef8c05472b Mon Sep 17 00:00:00 2001
From: Neil Booth <neil@daikokuya.demon.co.uk>
Date: Wed, 23 May 2001 22:50:28 +0000
Subject: cpp.texi: Update for handling of charconsts.

	* cpp.texi: Update for handling of charconsts.
	* cpplex.c (maybe_read_ucs): Don't accept D800-DFFF.  Update
	diagnostics.  Skip to the end if the UCS is too short.
	(cpp_interpret_charconst): Long charconsts issue a warning
	not an error.

	* gcc.dg/cpp/charconst.c: New tests.
	* gcc.dg/cpp/escape.c: New tests.
	* gcc.dg/cpp/escape-1.c: New tests.
	* gcc.dg/cpp/escape-2.c: New tests.
	* gcc.dg/cpp/ucs.c: New tests.

From-SVN: r42514
---
 gcc/cpplex.c | 66 +++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 34 insertions(+), 32 deletions(-)

(limited to 'gcc/cpplex.c')

diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index d261749..3185acc 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -1706,46 +1706,48 @@ maybe_read_ucs (pfile, pstr, limit, pc)
 
   if (CPP_WTRADITIONAL (pfile))
     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
-  
-  for (length = (c == 'u' ? 4: 8); length; --length)
-    {
-      if (p >= limit)
-	{
-	  cpp_error (pfile, "incomplete universal-character-name");
-	  break;
-	}
 
-      c = *p;
-      if (ISXDIGIT (c))
-	{
-	  code = (code << 4) + hex_digit_value (c);
-	  p++;
-	}
-      else
+  length = (c == 'u' ? 4: 8);
+
+  if ((size_t) (limit - p) < length)
+    {
+      cpp_error (pfile, "incomplete universal-character-name");
+      /* Skip to the end to avoid more diagnostics.  */
+      p = limit;
+    }
+  else
+    {
+      for (; length; length--, p++)
 	{
-	  cpp_error (pfile,
-		     "non-hex digit '%c' in universal-character-name", c);
-	  break;
+	  c = *p;
+	  if (ISXDIGIT (c))
+	    code = (code << 4) + hex_digit_value (c);
+	  else
+	    {
+	      cpp_error (pfile,
+			 "non-hex digit '%c' in universal-character-name", c);
+	      /* We shouldn't skip in case there are multibyte chars.  */
+	      break;
+	    }
 	}
-
     }
 
 #ifdef TARGET_EBCDIC
   cpp_error (pfile, "universal-character-name on EBCDIC target");
   code = 0x3f;  /* EBCDIC invalid character */
 #else
-  if (code > 0x9f && !(code & 0x80000000))
-    ; /* True extended character, OK.  */
-  else if (code >= 0x20 && code < 0x7f)
-    {
-      /* ASCII printable character.  The C character set consists of all of
-	 these except $, @ and `.  We use hex escapes so that this also
-	 works with EBCDIC hosts.  */
-      if (code != 0x24 && code != 0x40 && code != 0x60)
-	cpp_error (pfile, "universal-character-name used for '%c'", code);
-    }
-  else
-    cpp_error (pfile, "invalid universal-character-name");
+ /* True extended characters are OK.  */
+  if (code >= 0xa0
+      && !(code & 0x80000000)
+      && !(code >= 0xD800 && code <= 0xDFFF))
+    ;
+  /* The standard permits $, @ and ` to be specified as UCNs.  We use
+     hex escapes so that this also works with EBCDIC hosts.  */
+  else if (code == 0x24 || code == 0x40 || code == 0x60)
+    ;
+  /* Don't give another error if one occurred above.  */
+  else if (length == 0)
+    cpp_error (pfile, "universal-character-name out of range");
 #endif
 
   *pstr = p;
@@ -1970,7 +1972,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
   else if (chars_seen > max_chars)
     {
       chars_seen = max_chars;
-      cpp_error (pfile, "character constant too long");
+      cpp_warning (pfile, "character constant too long");
     }
   else if (chars_seen > 1 && !traditional && warn_multi)
     cpp_warning (pfile, "multi-character character constant");
-- 
cgit v1.1