From b1b60145aedb8adcb0b9dcf43a5ae735c2f03b51 Mon Sep 17 00:00:00 2001 From: Pedro Alves Date: Tue, 22 May 2018 17:35:38 +0100 Subject: Support UTF-8 identifiers in C/C++ expressions (PR gdb/22973) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out cp_ident_is_alpha/cp_ident_is_alnum out of gdb/cp-name-parser.y and use it in the C/C++ expression parser too. New test included. gdb/ChangeLog: 2018-05-22 Pedro Alves 張俊芝 PR gdb/22973 * c-exp.y: Include "c-support.h". (parse_number, c_parse_escape, lex_one_token): Use TOLOWER instead of tolower. Use c_ident_is_alpha to scan names. * c-lang.c: Include "c-support.h". (convert_ucn, convert_octal, convert_hex, convert_escape): Use ISXDIGIT instead of isxdigit and ISDIGIT instead of isdigit. * c-support.h: New file, with bits factored out from ... * cp-name-parser.y: ... this file. Include "c-support.h". (cp_ident_is_alpha, cp_ident_is_alnum): Deleted, moved to c-support.h and renamed. (symbol_end, yylex): Adjust. gdb/testsuite/ChangeLog: 2018-05-22 Pedro Alves PR gdb/22973 * gdb.base/utf8-identifiers.c: New file. * gdb.base/utf8-identifiers.exp: New file. --- gdb/c-support.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 gdb/c-support.h (limited to 'gdb/c-support.h') diff --git a/gdb/c-support.h b/gdb/c-support.h new file mode 100644 index 0000000..3641d6f --- /dev/null +++ b/gdb/c-support.h @@ -0,0 +1,46 @@ +/* Helper routines for C support in GDB. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef C_SUPPORT_H +#define C_SUPPORT_H + +#include "safe-ctype.h" + +/* Like ISALPHA, but also returns true for the union of all UTF-8 + multi-byte sequence bytes and non-ASCII characters in + extended-ASCII charsets (e.g., Latin1). I.e., returns true if the + high bit is set. Note that not all UTF-8 ranges are allowed in C++ + identifiers, but we don't need to be pedantic so for simplicity we + ignore that here. Plus this avoids the complication of actually + knowing what was the right encoding. */ + +static inline bool +c_ident_is_alpha (unsigned char ch) +{ + return ISALPHA (ch) || ch >= 0x80; +} + +/* Similarly, but Like ISALNUM. */ + +static inline bool +c_ident_is_alnum (unsigned char ch) +{ + return ISALNUM (ch) || ch >= 0x80; +} + +#endif /* C_SUPPORT_H */ -- cgit v1.1