Support UTF-8 identifiers in C/C++ expressions (PR gdb/22973)

Factor out cp_ident_is_alpha/cp_ident_is_alnum out of gdb/cp-name-parser.y and use it in the C/C++ expression parser too. New test included. gdb/ChangeLog: 2018-05-22 Pedro Alves <palves@redhat.com> 張俊芝 <zjz@zjz.name> PR gdb/22973 * c-exp.y: Include "c-support.h". (parse_number, c_parse_escape, lex_one_token): Use TOLOWER instead of tolower. Use c_ident_is_alpha to scan names. * c-lang.c: Include "c-support.h". (convert_ucn, convert_octal, convert_hex, convert_escape): Use ISXDIGIT instead of isxdigit and ISDIGIT instead of isdigit. * c-support.h: New file, with bits factored out from ... * cp-name-parser.y: ... this file. Include "c-support.h". (cp_ident_is_alpha, cp_ident_is_alnum): Deleted, moved to c-support.h and renamed. (symbol_end, yylex): Adjust. gdb/testsuite/ChangeLog: 2018-05-22 Pedro Alves <palves@redhat.com> PR gdb/22973 * gdb.base/utf8-identifiers.c: New file. * gdb.base/utf8-identifiers.exp: New file.
author: Pedro Alves <palves@redhat.com> 2018-05-22 17:35:38 +0100
committer: Pedro Alves <palves@redhat.com> 2018-05-22 17:35:38 +0100
commit: b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51 (patch)
tree: 777bdbeaa10580f6d5a404ad2d9b86abf11da683 /gdb/cp-name-parser.y
parent: 0ec848ad25bb77edd9c9c3c097c3dd5b8874a6c0 (diff)
download: fsf-binutils-gdb-b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51.zip
fsf-binutils-gdb-b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51.tar.gz
fsf-binutils-gdb-b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51.tar.bz2
1 files changed, 4 insertions, 25 deletions
diff --git a/gdb/cp-name-parser.y b/gdb/cp-name-parser.y
index f522e46..ebae562 100644
--- a/gdb/cp-name-parser.y
+++ b/gdb/cp-name-parser.y
@@ -35,6 +35,7 @@
 #include "safe-ctype.h"
 #include "demangle.h"
 #include "cp-support.h"
+#include "c-support.h"
 
 /* Bison does not make it easy to create a parser without global
    state, unfortunately.  Here are all the global variables used
@@ -1304,28 +1305,6 @@ d_binary (const char *name, struct demangle_component *lhs, struct demangle_comp
 		      fill_comp (DEMANGLE_COMPONENT_BINARY_ARGS, lhs, rhs));
 }
 
-/* Like ISALPHA, but also returns true for the union of all UTF-8
-   multi-byte sequence bytes and non-ASCII characters in
-   extended-ASCII charsets (e.g., Latin1).  I.e., returns true if the
-   high bit is set.  Note that not all UTF-8 ranges are allowed in C++
-   identifiers, but we don't need to be pedantic so for simplicity we
-   ignore that here.  Plus this avoids the complication of actually
-   knowing what was the right encoding.  */
-
-static inline bool
-cp_ident_is_alpha (unsigned char ch)
-{
-  return ISALPHA (ch) || ch >= 0x80;
-}
-
-/* Similarly, but Like ISALNUM.  */
-
-static inline bool
-cp_ident_is_alnum (unsigned char ch)
-{
-  return ISALNUM (ch) || ch >= 0x80;
-}
-
 /* Find the end of a symbol name starting at LEXPTR.  */
 
 static const char *
@@ -1333,7 +1312,7 @@ symbol_end (const char *lexptr)
 {
   const char *p = lexptr;
 
-  while (*p && (cp_ident_is_alnum (*p) || *p == '_' || *p == '$' || *p == '.'))
+  while (*p && (c_ident_is_alnum (*p) || *p == '_' || *p == '$' || *p == '.'))
     p++;
 
   return p;
@@ -1813,7 +1792,7 @@ yylex (void)
       return ERROR;
     }
 
-  if (!(c == '_' || c == '$' || cp_ident_is_alpha (c)))
+  if (!(c == '_' || c == '$' || c_ident_is_alpha (c)))
     {
       /* We must have come across a bad character (e.g. ';').  */
       yyerror (_("invalid character"));
@@ -1824,7 +1803,7 @@ yylex (void)
   namelen = 0;
   do
     c = tokstart[++namelen];
-  while (cp_ident_is_alnum (c) || c == '_' || c == '$');
+  while (c_ident_is_alnum (c) || c == '_' || c == '$');
 
   lexptr += namelen;
author	Pedro Alves <palves@redhat.com>	2018-05-22 17:35:38 +0100
committer	Pedro Alves <palves@redhat.com>	2018-05-22 17:35:38 +0100
commit	b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51 (patch)
tree	777bdbeaa10580f6d5a404ad2d9b86abf11da683 /gdb/cp-name-parser.y
parent	0ec848ad25bb77edd9c9c3c097c3dd5b8874a6c0 (diff)
download	fsf-binutils-gdb-b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51.zip fsf-binutils-gdb-b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51.tar.gz fsf-binutils-gdb-b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51.tar.bz2