aboutsummaryrefslogtreecommitdiff
path: root/libcpp/lex.cc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-08-20 10:26:55 +0200
committerJakub Jelinek <jakub@redhat.com>2022-08-20 10:26:55 +0200
commite9dd050e0ccd644c3bb6d6538dc6187157f6b3e8 (patch)
tree83db1180a30890d65465b55a54be1e8a30cc1534 /libcpp/lex.cc
parent613e9e16b85e209fad316deaef33cfaf7bd2bc98 (diff)
downloadgcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.zip
gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.tar.gz
gcc-e9dd050e0ccd644c3bb6d6538dc6187157f6b3e8.tar.bz2
libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
The following patch implements the C++23 P2290R3 paper. 2022-08-20 Jakub Jelinek <jakub@redhat.com> PR c++/106645 libcpp/ * include/cpplib.h (struct cpp_options): Implement P2290R3 - Delimited escape sequences. Add delimite_escape_seqs member. * init.cc (struct lang_flags): Likewise. (lang_defaults): Add delim column. (cpp_set_lang): Copy over delimite_escape_seqs. * charset.cc (extend_char_range): New function. (_cpp_valid_ucn): Use it. Handle delimited escape sequences. (convert_hex): Likewise. (convert_oct): Likewise. (convert_ucn): Use extend_char_range. (convert_escape): Call convert_oct even for \o. (_cpp_interpret_identifier): Handle delimited escape sequences. * lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in. (get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to compute num_bytes. gcc/testsuite/ * c-c++-common/cpp/delimited-escape-seq-1.c: New test. * c-c++-common/cpp/delimited-escape-seq-2.c: New test. * c-c++-common/cpp/delimited-escape-seq-3.c: New test. * c-c++-common/Wbidi-chars-24.c: New test. * gcc.dg/cpp/delimited-escape-seq-1.c: New test. * gcc.dg/cpp/delimited-escape-seq-2.c: New test. * g++.dg/cpp/delimited-escape-seq-1.C: New test. * g++.dg/cpp/delimited-escape-seq-2.C: New test.
Diffstat (limited to 'libcpp/lex.cc')
-rw-r--r--libcpp/lex.cc25
1 files changed, 21 insertions, 4 deletions
diff --git a/libcpp/lex.cc b/libcpp/lex.cc
index f891d3e..571cd2a 100644
--- a/libcpp/lex.cc
+++ b/libcpp/lex.cc
@@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
/* Parse a UCN where P points just past \u or \U and return its bidi code. */
static bidi::kind
-get_bidi_ucn_1 (const unsigned char *p, bool is_U)
+get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
{
/* 6.4.3 Universal Character Names
\u hex-quad
\U hex-quad hex-quad
+ \u { simple-hexadecimal-digit-sequence }
where \unnnn means \U0000nnnn. */
+ *end = p + 4;
if (is_U)
{
if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
return bidi::kind::NONE;
/* Skip 4B so we can treat \u and \U the same below. */
p += 4;
+ *end += 4;
+ }
+ else if (p[0] == '{')
+ {
+ p++;
+ while (*p == '0')
+ p++;
+ if (p[0] != '2'
+ || p[1] != '0'
+ || !ISXDIGIT (p[2])
+ || !ISXDIGIT (p[3])
+ || p[4] != '}')
+ return bidi::kind::NONE;
+ *end = p + 5;
}
/* All code points we are looking for start with 20xx. */
@@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p, bool is_U)
If the kind is not NONE, write the location to *OUT.*/
static bidi::kind
-get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
+get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
location_t *out)
{
- bidi::kind result = get_bidi_ucn_1 (p, is_U);
+ const unsigned char *end;
+ bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
if (result != bidi::kind::NONE)
{
const unsigned char *start = p - 2;
- size_t num_bytes = 2 + (is_U ? 8 : 4);
+ size_t num_bytes = end - start;
*out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
}
return result;