libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]

The following patch implements the C++23 P2290R3 paper. 2022-08-20 Jakub Jelinek <jakub@redhat.com> PR c++/106645 libcpp/ * include/cpplib.h (struct cpp_options): Implement P2290R3 - Delimited escape sequences. Add delimite_escape_seqs member. * init.cc (struct lang_flags): Likewise. (lang_defaults): Add delim column. (cpp_set_lang): Copy over delimite_escape_seqs. * charset.cc (extend_char_range): New function. (_cpp_valid_ucn): Use it. Handle delimited escape sequences. (convert_hex): Likewise. (convert_oct): Likewise. (convert_ucn): Use extend_char_range. (convert_escape): Call convert_oct even for \o. (_cpp_interpret_identifier): Handle delimited escape sequences. * lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in. (get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to compute num_bytes. gcc/testsuite/ * c-c++-common/cpp/delimited-escape-seq-1.c: New test. * c-c++-common/cpp/delimited-escape-seq-2.c: New test. * c-c++-common/cpp/delimited-escape-seq-3.c: New test. * c-c++-common/Wbidi-chars-24.c: New test. * gcc.dg/cpp/delimited-escape-seq-1.c: New test. * gcc.dg/cpp/delimited-escape-seq-2.c: New test. * g++.dg/cpp/delimited-escape-seq-1.C: New test. * g++.dg/cpp/delimited-escape-seq-2.C: New test.
author: Jakub Jelinek <jakub@redhat.com> 2022-08-20 10:26:55 +0200
committer: Jakub Jelinek <jakub@redhat.com> 2022-08-20 10:26:55 +0200
commit: e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8 (patch)
tree: 83db1180a30890d65465b55a54be1e8a30cc1534 /libcpp
parent: 613e9e16b85e209fad316deaef33cfaf7bd2bc98 (diff)
download: gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.zip
gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.tar.gz
gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.tar.bz2
4 files changed, 225 insertions, 50 deletions
diff --git a/libcpp/charset.cc b/libcpp/charset.cc
index 12e3163..cf4a525 100644
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -1036,6 +1036,19 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
   return 1;
 }
 
+/* Increment char_range->m_finish by a single character.  */
+
+static void
+extend_char_range (source_range *char_range,
+		   cpp_string_location_reader *loc_reader)
+{
+  if (loc_reader)
+    {
+      gcc_assert (char_range);
+      char_range->m_finish = loc_reader->get_next ().m_finish;
+    }
+}
+
 /* [lex.charset]: The character designated by the universal character
    name \UNNNNNNNN is that character whose character short name in
    ISO/IEC 10646 is NNNNNNNN; the character designated by the
@@ -1081,6 +1094,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
   unsigned int length;
   const uchar *str = *pstr;
   const uchar *base = str - 2;
+  bool delimited = false;
 
   if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
     cpp_error (pfile, CPP_DL_WARNING,
@@ -1095,7 +1109,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
 	         (int) str[-1]);
 
   if (str[-1] == 'u')
-    length = 4;
+    {
+      length = 4;
+      if (str < limit && *str == '{')
+	{
+	  str++;
+	  /* Magic value to indicate no digits seen.  */
+	  length = 32;
+	  delimited = true;
+	  extend_char_range (char_range, loc_reader);
+	}
+    }
   else if (str[-1] == 'U')
     length = 8;
   else
@@ -1107,18 +1131,53 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
   result = 0;
   do
     {
+      if (str == limit)
+	break;
       c = *str;
       if (!ISXDIGIT (c))
 	break;
       str++;
-      if (loc_reader)
+      extend_char_range (char_range, loc_reader);
+      if (delimited)
 	{
-	  gcc_assert (char_range);
-	  char_range->m_finish = loc_reader->get_next ().m_finish;
+	  if (!result)
+	    /* Accept arbitrary number of leading zeros.
+	       16 is another magic value, smaller than 32 above
+	       and bigger than 8, so that upon encountering first
+	       non-zero digit we can count 8 digits and after that
+	       or in overflow bit and ensure length doesn't decrease
+	       to 0, as delimited escape sequence doesn't have upper
+	       bound on the number of hex digits.  */
+	    length = 16;
+	  else if (length == 16 - 8)
+	    {
+	      /* Make sure we detect overflows.  */
+	      result |= 0x8000000;
+	      ++length;
+	    }
 	}
+
       result = (result << 4) + hex_value (c);
     }
-  while (--length && str < limit);
+  while (--length);
+
+  if (delimited
+      && str < limit
+      && *str == '}'
+      && (length != 32 || !identifier_pos))
+    {
+      if (length == 32)
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "empty delimited escape sequence");
+      else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+	       && CPP_OPTION (pfile, cpp_pedantic))
+	cpp_error (pfile, CPP_DL_PEDWARN,
+		   "delimited escape sequences are only valid in C++23");
+      str++;
+      length = 0;
+      delimited = false;
+      extend_char_range (char_range, loc_reader);
+    }
 
   /* Partial UCNs are not valid in strings, but decompose into
      multiple tokens in identifiers, so we can't give a helpful
@@ -1132,9 +1191,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
   *pstr = str;
   if (length)
     {
-      cpp_error (pfile, CPP_DL_ERROR,
-		 "incomplete universal character name %.*s",
-		 (int) (str - base), base);
+      if (!delimited)
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "incomplete universal character name %.*s",
+		   (int) (str - base), base);
+      else
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "'\\u{' not terminated with '}' after %.*s",
+		   (int) (str - base), base);
       result = 1;
     }
   /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
@@ -1212,9 +1276,8 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
 
   from++;  /* Skip u/U.  */
 
-  if (loc_reader)
-    /* The u/U is part of the spelling of this character.  */
-    char_range.m_finish = loc_reader->get_next ().m_finish;
+  /* The u/U is part of the spelling of this character.  */
+  extend_char_range (&char_range, loc_reader);
 
   _cpp_valid_ucn (pfile, &from, limit, 0, &nst,
 		  &ucn, &char_range, loc_reader);
@@ -1392,6 +1455,8 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
   int digits_found = 0;
   size_t width = cvt.width;
   size_t mask = width_to_mask (width);
+  bool delimited = false;
+  const uchar *base = from - 1;
 
   /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
   gcc_assert ((loc_reader != NULL) == (ranges != NULL));
@@ -1404,8 +1469,14 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
   from++;
 
   /* The 'x' is part of the spelling of this character.  */
-  if (loc_reader)
-    char_range.m_finish = loc_reader->get_next ().m_finish;
+  extend_char_range (&char_range, loc_reader);
+
+  if (from < limit && *from == '{')
+    {
+      delimited = true;
+      from++;
+      extend_char_range (&char_range, loc_reader);
+    }
 
   while (from < limit)
     {
@@ -1413,19 +1484,42 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
       if (! hex_p (c))
 	break;
       from++;
-      if (loc_reader)
-	char_range.m_finish = loc_reader->get_next ().m_finish;
+      extend_char_range (&char_range, loc_reader);
       overflow |= n ^ (n << 4 >> 4);
       n = (n << 4) + hex_value (c);
       digits_found = 1;
     }
 
+  if (delimited && from < limit && *from == '}')
+    {
+      from++;
+      if (!digits_found)
+	{
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "empty delimited escape sequence");
+	  return from;
+	}
+     else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+	      && CPP_OPTION (pfile, cpp_pedantic))
+	cpp_error (pfile, CPP_DL_PEDWARN,
+		   "delimited escape sequences are only valid in C++23");
+      delimited = false;
+      extend_char_range (&char_range, loc_reader);
+    }
+
   if (!digits_found)
     {
       cpp_error (pfile, CPP_DL_ERROR,
 		 "\\x used with no following hex digits");
       return from;
     }
+  else if (delimited)
+    {
+      cpp_error (pfile, CPP_DL_ERROR,
+		 "'\\x{' not terminated with '}' after %.*s",
+		 (int) (from - base), base);
+      return from;
+    }
 
   if (overflow | (n != (n & mask)))
     {
@@ -1459,25 +1553,71 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
 	     cpp_substring_ranges *ranges)
 {
   size_t count = 0;
-  cppchar_t c, n = 0;
+  cppchar_t c, n = 0, overflow = 0;
   size_t width = cvt.width;
   size_t mask = width_to_mask (width);
+  bool delimited = false;
+  const uchar *base = from - 1;
 
   /* loc_reader and ranges must either be both NULL, or both be non-NULL.  */
   gcc_assert ((loc_reader != NULL) == (ranges != NULL));
 
+  if (from < limit && *from == 'o')
+    {
+      from++;
+      extend_char_range (&char_range, loc_reader);
+      if (from == limit || *from != '{')
+	cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
+      else
+	{
+	  from++;
+	  extend_char_range (&char_range, loc_reader);
+	  delimited = true;
+	}
+    }
+
   while (from < limit && count++ < 3)
     {
       c = *from;
       if (c < '0' || c > '7')
 	break;
       from++;
-      if (loc_reader)
-	char_range.m_finish = loc_reader->get_next ().m_finish;
+      extend_char_range (&char_range, loc_reader);
+      if (delimited)
+	{
+	  count = 2;
+	  overflow |= n ^ (n << 3 >> 3);
+	}
       n = (n << 3) + c - '0';
     }
 
-  if (n != (n & mask))
+  if (delimited)
+    {
+      if (from < limit && *from == '}')
+	{
+	  from++;
+	  if (count == 1)
+	    {
+	      cpp_error (pfile, CPP_DL_ERROR,
+			 "empty delimited escape sequence");
+	      return from;
+	    }
+	  else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+		   && CPP_OPTION (pfile, cpp_pedantic))
+	    cpp_error (pfile, CPP_DL_PEDWARN,
+		       "delimited escape sequences are only valid in C++23");
+	  extend_char_range (&char_range, loc_reader);
+	}
+      else
+	{
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "'\\o{' not terminated with '}' after %.*s",
+		     (int) (from - base), base);
+	  return from;
+	}
+    }
+
+  if (overflow | (n != (n & mask)))
     {
       cpp_error (pfile, CPP_DL_PEDWARN,
 		 "octal escape sequence out of range");
@@ -1535,6 +1675,7 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
 
     case '0':  case '1':  case '2':  case '3':
     case '4':  case '5':  case '6':  case '7':
+    case 'o':
       return convert_oct (pfile, from, limit, tbuf, cvt,
 			  char_range, loc_reader, ranges);
 
@@ -2119,15 +2260,27 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
 	cppchar_t value = 0;
 	size_t bufleft = len - (bufp - buf);
 	int rval;
+	bool delimited = false;
 
 	idp += 2;
+	if (length == 4 && id[idp] == '{')
+	  {
+	    delimited = true;
+	    idp++;
+	  }
 	while (length && idp < len && ISXDIGIT (id[idp]))
 	  {
 	    value = (value << 4) + hex_value (id[idp]);
 	    idp++;
-	    length--;
+	    if (!delimited)
+	      length--;
 	  }
-	idp--;
+	if (!delimited)
+	  idp--;
+	/* else
+	     assert (id[idp] == '}');
+	   As the caller ensures it is a valid identifier, if it is
+	   delimited escape sequence, it must be terminated by }.  */
 
 	/* Special case for EBCDIC: if the identifier contains
 	   a '$' specified using a UCN, translate it to EBCDIC.  */
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 58ed51d..810203d 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -522,6 +522,9 @@ struct cpp_options
   /* Nonzero for C++23 size_t literals.  */
   unsigned char size_t_literals;
 
+  /* Nonzero for C++23 delimited escape sequences.  */
+  unsigned char delimited_escape_seqs;
+
   /* Holds the name of the target (execution) character set.  */
   const char *narrow_charset;
 
diff --git a/libcpp/init.cc b/libcpp/init.cc
index 8d99bdb..b184e1f 100644
--- a/libcpp/init.cc
+++ b/libcpp/init.cc
@@ -97,34 +97,35 @@ struct lang_flags
   char size_t_literals;
   char elifdef;
   char warning_directive;
+  char delimited_escape_seqs;
 };
 
 static const struct lang_flags lang_defaults[] =
-{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir */
-  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
-  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
-  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
-  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
-  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      1 },
-  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
-  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
-  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
-  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
-  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0 },
-  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1,      1 },
-  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
-  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
-  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0 },
-  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0 },
-  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0 },
-  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0 },
-  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
-  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0 },
-  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
-  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0 },
-  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      0 },
-  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      0 },
-  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0 }
+{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim */
+  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0 },
+  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0 },
+  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0 },
+  /* GNUC17   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0 },
+  /* GNUC2X   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    1,     1,     0,   1,      1,   1,     1,   0,   1,      1,      0 },
+  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0 },
+  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0 },
+  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0 },
+  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0 },
+  /* STDC17   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0,      0,   0,     0,   0,   0,      0,      0 },
+  /* STDC2X   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    1,     1,     1,   1,      0,   1,     1,   0,   1,      1,      0 },
+  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0 },
+  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0,      0 },
+  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0,      1,   1,     0,   0,   0,      0,      0 },
+  /* CXX11    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0,      0,   1,     0,   0,   0,      0,      0 },
+  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0,      1,   1,     0,   0,   0,      0,      0 },
+  /* CXX14    */  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0,      0,   1,     0,   0,   0,      0,      0 },
+  /* GNUCXX17 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0 },
+  /* CXX17    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      0,   1,     0,   0,   0,      0,      0 },
+  /* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0 },
+  /* CXX20    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   0,   0,      0,      0 },
+  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      0,      1 },
+  /* CXX23    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1,      1,   1,     0,   1,   1,      0,      1 },
+  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0,      0,   0,     0,   0,   0,      0,      0 }
 };
 
 /* Sets internal flags correctly for a given language.  */
@@ -155,6 +156,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
   CPP_OPTION (pfile, size_t_literals)		 = l->size_t_literals;
   CPP_OPTION (pfile, elifdef)			 = l->elifdef;
   CPP_OPTION (pfile, warning_directive)		 = l->warning_directive;
+  CPP_OPTION (pfile, delimited_escape_seqs)	 = l->delimited_escape_seqs;
 }
 
 /* Initialize library global state.  */
diff --git a/libcpp/lex.cc b/libcpp/lex.cc
index f891d3e..571cd2a 100644
--- a/libcpp/lex.cc
+++ b/libcpp/lex.cc
@@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
 /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
 
 static bidi::kind
-get_bidi_ucn_1 (const unsigned char *p, bool is_U)
+get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
 {
   /* 6.4.3 Universal Character Names
       \u hex-quad
       \U hex-quad hex-quad
+      \u { simple-hexadecimal-digit-sequence }
      where \unnnn means \U0000nnnn.  */
 
+  *end = p + 4;
   if (is_U)
     {
       if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
 	return bidi::kind::NONE;
       /* Skip 4B so we can treat \u and \U the same below.  */
       p += 4;
+      *end += 4;
+    }
+  else if (p[0] == '{')
+    {
+      p++;
+      while (*p == '0')
+	p++;
+      if (p[0] != '2'
+	  || p[1] != '0'
+	  || !ISXDIGIT (p[2])
+	  || !ISXDIGIT (p[3])
+	  || p[4] != '}')
+	return bidi::kind::NONE;
+      *end = p + 5;
     }
 
   /* All code points we are looking for start with 20xx.  */
@@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p, bool is_U)
    If the kind is not NONE, write the location to *OUT.*/
 
 static bidi::kind
-get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
+get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
 	      location_t *out)
 {
-  bidi::kind result = get_bidi_ucn_1 (p, is_U);
+  const unsigned char *end;
+  bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
   if (result != bidi::kind::NONE)
     {
       const unsigned char *start = p - 2;
-      size_t num_bytes = 2 + (is_U ? 8 : 4);
+      size_t num_bytes = end - start;
       *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
     }
   return result;
author	Jakub Jelinek <jakub@redhat.com>	2022-08-20 10:26:55 +0200
committer	Jakub Jelinek <jakub@redhat.com>	2022-08-20 10:26:55 +0200
commit	e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8 (patch)
tree	83db1180a30890d65465b55a54be1e8a30cc1534 /libcpp
parent	613e9e16b85e209fad316deaef33cfaf7bd2bc98 (diff)
download	gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.zip gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.tar.gz gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.tar.bz2