From 572f5e1bc68e131b25cd2d5ba231e932f5038904 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 7 Sep 2022 08:44:38 +0200 Subject: libcpp: Named universal character escapes and delimited escape sequence tweaks On Tue, Aug 30, 2022 at 09:10:37PM +0000, Joseph Myers wrote: > I'm seeing build failures of glibc for powerpc64, as illustrated by the > following C code: > > #if 0 > \NARG > #endif > > (the actual sysdeps/powerpc/powerpc64/sysdep.h code is inside #ifdef > __ASSEMBLER__). > > This shows some problems with this feature - and with delimited escape > sequences - as it affects C. It's fine to accept it as an extension > inside string and character literals, because \N or \u{...} would be > invalid in the absence of the feature (i.e. the syntax for such literals > fails to match, meaning that the rule about undefined behavior for a > single ' or " as a pp-token applies). But outside string and character > literals, the usual lexing rules apply, the \ is a pp-token on its own and > the code is valid at the preprocessing level, and with expansion of macros > appearing before or after the \ (e.g. u defined as a macro in the \u{...} > case) it may be valid code at the language level as well. I don't know > what older C++ versions say about this, but for C this means e.g. > > #define z(x) 0 > #define a z( > int x = a\NARG); > > needs to be accepted as expanding to "int x = 0;", not interpreted as > using the \N feature in an identifier and produce an error. The following patch changes this, so that: 1) outside of string/character literals, \N without following { is never treated as an error nor warning, it is silently treated as \ separate token followed by whatever is after it 2) \u{123} and \N{LATIN SMALL LETTER A WITH ACUTE} are not handled as extension at all outside of string/character literals in the strict standard modes (-std=c*) except for -std=c++{23,2b}, only in the -std=gnu* modes, because it changes behavior on valid sources, e.g. #define z(x) 0 #define a z( int x = a\u{123}); int y = a\N{LATIN SMALL LETTER A WITH ACUTE}); 3) introduces -Wunicode warning (on by default) and warns for cases of what looks like invalid delimited escape sequence or named universal character escape outside of string/character literals and is treated as separate tokens 2022-09-07 Jakub Jelinek libcpp/ * include/cpplib.h (struct cpp_options): Add cpp_warn_unicode member. (enum cpp_warning_reason): Add CPP_W_UNICODE. * init.cc (cpp_create_reader): Initialize cpp_warn_unicode. * charset.cc (_cpp_valid_ucn): In possible identifier contexts, don't handle \u{ or \N{ specially in -std=c* modes except -std=c++2{3,b}. In possible identifier contexts, don't emit an error and punt if \N isn't followed by {, or if \N{} surrounds some lower case letters or _. In possible identifier contexts when not C++23, don't emit an error but warning about unknown character names and treat as separate tokens. When treating as separate tokens \u{ or \N{, emit warnings. gcc/ * doc/invoke.texi (-Wno-unicode): Document. gcc/c-family/ * c.opt (Winvalid-utf8): Use ObjC instead of objC. Remove " in comments" from description. (Wunicode): New option. gcc/testsuite/ * c-c++-common/cpp/delimited-escape-seq-4.c: New test. * c-c++-common/cpp/delimited-escape-seq-5.c: New test. * c-c++-common/cpp/delimited-escape-seq-6.c: New test. * c-c++-common/cpp/delimited-escape-seq-7.c: New test. * c-c++-common/cpp/named-universal-char-escape-5.c: New test. * c-c++-common/cpp/named-universal-char-escape-6.c: New test. * c-c++-common/cpp/named-universal-char-escape-7.c: New test. * g++.dg/cpp23/named-universal-char-escape1.C: New test. * g++.dg/cpp23/named-universal-char-escape2.C: New test. --- libcpp/charset.cc | 86 ++++++++++++++++++++++++++++++++++++++++--------- libcpp/include/cpplib.h | 7 +++- libcpp/init.cc | 1 + 3 files changed, 78 insertions(+), 16 deletions(-) (limited to 'libcpp') diff --git a/libcpp/charset.cc b/libcpp/charset.cc index c9656db..6834969 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -1448,7 +1448,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, if (str[-1] == 'u') { length = 4; - if (str < limit && *str == '{') + if (str < limit + && *str == '{' + && (!identifier_pos + || CPP_OPTION (pfile, delimited_escape_seqs) + || !CPP_OPTION (pfile, std))) { str++; /* Magic value to indicate no digits seen. */ @@ -1462,8 +1466,22 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, else if (str[-1] == 'N') { length = 4; + if (identifier_pos + && !CPP_OPTION (pfile, delimited_escape_seqs) + && CPP_OPTION (pfile, std)) + { + *cp = 0; + return false; + } if (str == limit || *str != '{') - cpp_error (pfile, CPP_DL_ERROR, "'\\N' not followed by '{'"); + { + if (identifier_pos) + { + *cp = 0; + return false; + } + cpp_error (pfile, CPP_DL_ERROR, "'\\N' not followed by '{'"); + } else { str++; @@ -1489,15 +1507,19 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, if (str < limit && *str == '}') { - if (name == str && identifier_pos) + if (identifier_pos && name == str) { + cpp_warning (pfile, CPP_W_UNICODE, + "empty named universal character escape " + "sequence; treating it as separate tokens"); *cp = 0; return false; } if (name == str) cpp_error (pfile, CPP_DL_ERROR, "empty named universal character escape sequence"); - else if (!CPP_OPTION (pfile, delimited_escape_seqs) + else if ((!identifier_pos || strict) + && !CPP_OPTION (pfile, delimited_escape_seqs) && CPP_OPTION (pfile, cpp_pedantic)) cpp_error (pfile, CPP_DL_PEDWARN, "named universal character escapes are only valid " @@ -1515,27 +1537,51 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, uname2c_tree, NULL); if (result == (cppchar_t) -1) { - cpp_error (pfile, CPP_DL_ERROR, - "\\N{%.*s} is not a valid universal " - "character", (int) (str - name), name); + bool ret = true; + if (identifier_pos + && (!CPP_OPTION (pfile, delimited_escape_seqs) + || !strict)) + ret = cpp_warning (pfile, CPP_W_UNICODE, + "\\N{%.*s} is not a valid " + "universal character; treating it " + "as separate tokens", + (int) (str - name), name); + else + cpp_error (pfile, CPP_DL_ERROR, + "\\N{%.*s} is not a valid universal " + "character", (int) (str - name), name); /* Try to do a loose name lookup according to Unicode loose matching rule UAX44-LM2. */ char canon_name[uname2c_max_name_len + 1]; result = _cpp_uname2c_uax44_lm2 ((const char *) name, str - name, canon_name); - if (result != (cppchar_t) -1) + if (result != (cppchar_t) -1 && ret) cpp_error (pfile, CPP_DL_NOTE, "did you mean \\N{%s}?", canon_name); else - result = 0x40; + result = 0xC0; + if (identifier_pos + && (!CPP_OPTION (pfile, delimited_escape_seqs) + || !strict)) + { + *cp = 0; + return false; + } } } str++; extend_char_range (char_range, loc_reader); } else if (identifier_pos) - length = 1; + { + cpp_warning (pfile, CPP_W_UNICODE, + "'\\N{' not terminated with '}' after %.*s; " + "treating it as separate tokens", + (int) (str - base), base); + *cp = 0; + return false; + } else { cpp_error (pfile, CPP_DL_ERROR, @@ -1584,12 +1630,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, } while (--length); - if (delimited - && str < limit - && *str == '}' - && (length != 32 || !identifier_pos)) + if (delimited && str < limit && *str == '}') { - if (length == 32) + if (length == 32 && identifier_pos) + { + cpp_warning (pfile, CPP_W_UNICODE, + "empty delimited escape sequence; " + "treating it as separate tokens"); + *cp = 0; + return false; + } + else if (length == 32) cpp_error (pfile, CPP_DL_ERROR, "empty delimited escape sequence"); else if (!CPP_OPTION (pfile, delimited_escape_seqs) @@ -1607,6 +1658,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, error message in that case. */ if (length && identifier_pos) { + if (delimited) + cpp_warning (pfile, CPP_W_UNICODE, + "'\\u{' not terminated with '}' after %.*s; " + "treating it as separate tokens", + (int) (str - base), base); *cp = 0; return false; } diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 1a3fb19..c25bcf2 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -565,6 +565,10 @@ struct cpp_options 2 if it should be a pedwarn. */ unsigned char cpp_warn_invalid_utf8; + /* True if libcpp should warn about invalid forms of delimited or named + escape sequences. */ + bool cpp_warn_unicode; + /* True if -finput-charset= option has been used explicitly. */ bool cpp_input_charset_explicit; @@ -675,7 +679,8 @@ enum cpp_warning_reason { CPP_W_CXX20_COMPAT, CPP_W_EXPANSION_TO_DEFINED, CPP_W_BIDIRECTIONAL, - CPP_W_INVALID_UTF8 + CPP_W_INVALID_UTF8, + CPP_W_UNICODE }; /* Callback for header lookup for HEADER, which is the name of a diff --git a/libcpp/init.cc b/libcpp/init.cc index 3e5601a..6292524 100644 --- a/libcpp/init.cc +++ b/libcpp/init.cc @@ -228,6 +228,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, CPP_OPTION (pfile, warn_date_time) = 0; CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; CPP_OPTION (pfile, cpp_warn_invalid_utf8) = 0; + CPP_OPTION (pfile, cpp_warn_unicode) = 1; CPP_OPTION (pfile, cpp_input_charset_explicit) = 0; /* Default CPP arithmetic to something sensible for the host for the -- cgit v1.1 From 0a91bdaf177409a2a5e7895bce4f0e7091b4b3ca Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Wed, 7 Sep 2022 13:56:25 +0000 Subject: c: New C2x keywords C2x follows C++ in making alignas, alignof, bool, false, static_assert, thread_local and true keywords; implement this accordingly. This implementation makes them normal keywords in C2x mode just like any other keyword (C2x leaves open the possibility of implementation using predefined macros instead - thus, there aren't any testcases asserting that they aren't macros). As in C++ and previous versions of C, true and false are handled like signed 1 and 0 in #if (there was an intermediate state in some C2x drafts where they had different macro expansions that were unsigned in #if). Bootstrapped with no regressions for x86_64-pc-linux-gnu. As with the removal of unprototyped functions, this change has a high risk of breaking some old code and people doing GNU/Linux distribution builds may wish to see how much is broken in a build with a -std=gnu2x default. gcc/ * ginclude/stdalign.h [defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L]: Disable all content. * ginclude/stdbool.h [defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L] (bool, true, false): Do not define. gcc/c-family/ * c-common.cc (c_common_reswords): Use D_C2X instead of D_CXXONLY for alignas, alignof, bool, false, static_assert, thread_local and true. gcc/c/ * c-parser.cc (c_parser_static_assert_declaration_no_semi) (c_parser_alignas_specifier, c_parser_alignof_expression): Allow for C2x spellings of keywords. (c_parser_postfix_expression): Handle RID_TRUE and RID_FALSE. gcc/testsuite/ * gcc.dg/c11-keywords-1.c, gcc.dg/c2x-align-1.c, gcc.dg/c2x-align-6.c, gcc.dg/c2x-bool-2.c, gcc.dg/c2x-static-assert-3.c, gcc.dg/c2x-static-assert-4.c, gcc.dg/c2x-thread-local-1.c: New tests. * gcc.dg/c2x-bool-1.c: Update expectations. libcpp/ * include/cpplib.h (struct cpp_options): Add true_false. * expr.cc (eval_token): Check true_false not cplusplus to determine whether to handle true and false keywords. * init.cc (struct lang_flags): Add true_false. (lang_defaults): Update. (cpp_set_lang): Set true_false. --- libcpp/expr.cc | 2 +- libcpp/include/cpplib.h | 3 +++ libcpp/init.cc | 52 +++++++++++++++++++++++++------------------------ 3 files changed, 31 insertions(+), 26 deletions(-) (limited to 'libcpp') diff --git a/libcpp/expr.cc b/libcpp/expr.cc index a022904..4bb02c4 100644 --- a/libcpp/expr.cc +++ b/libcpp/expr.cc @@ -1183,7 +1183,7 @@ eval_token (cpp_reader *pfile, const cpp_token *token, case CPP_NAME: if (token->val.node.node == pfile->spec_nodes.n_defined) return parse_defined (pfile); - else if (CPP_OPTION (pfile, cplusplus) + else if (CPP_OPTION (pfile, true_false) && (token->val.node.node == pfile->spec_nodes.n_true || token->val.node.node == pfile->spec_nodes.n_false)) { diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index c25bcf2..2db1e9c 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -526,6 +526,9 @@ struct cpp_options /* Nonzero for C++23 delimited escape sequences. */ unsigned char delimited_escape_seqs; + /* Nonzero for 'true' and 'false' in #if expressions. */ + unsigned char true_false; + /* Holds the name of the target (execution) character set. */ const char *narrow_charset; diff --git a/libcpp/init.cc b/libcpp/init.cc index 6292524..d3b4f00 100644 --- a/libcpp/init.cc +++ b/libcpp/init.cc @@ -98,34 +98,35 @@ struct lang_flags char elifdef; char warning_directive; char delimited_escape_seqs; + char true_false; }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, - /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, - /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */ + /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 }, + /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 }, + /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 }, + /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 }, + /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /* Sets internal flags correctly for a given language. */ @@ -157,6 +158,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang) CPP_OPTION (pfile, elifdef) = l->elifdef; CPP_OPTION (pfile, warning_directive) = l->warning_directive; CPP_OPTION (pfile, delimited_escape_seqs) = l->delimited_escape_seqs; + CPP_OPTION (pfile, true_false) = l->true_false; } /* Initialize library global state. */ -- cgit v1.1 From fe2a8ce93c86e05730ee9b975f413cb3fc288d94 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 8 Sep 2022 00:18:33 +0000 Subject: Daily bump. --- libcpp/ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'libcpp') diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 7aba4f5..625b4e5 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,26 @@ +2022-09-07 Joseph Myers + + * include/cpplib.h (struct cpp_options): Add true_false. + * expr.cc (eval_token): Check true_false not cplusplus to + determine whether to handle true and false keywords. + * init.cc (struct lang_flags): Add true_false. + (lang_defaults): Update. + (cpp_set_lang): Set true_false. + +2022-09-07 Jakub Jelinek + + * include/cpplib.h (struct cpp_options): Add cpp_warn_unicode member. + (enum cpp_warning_reason): Add CPP_W_UNICODE. + * init.cc (cpp_create_reader): Initialize cpp_warn_unicode. + * charset.cc (_cpp_valid_ucn): In possible identifier contexts, don't + handle \u{ or \N{ specially in -std=c* modes except -std=c++2{3,b}. + In possible identifier contexts, don't emit an error and punt + if \N isn't followed by {, or if \N{} surrounds some lower case + letters or _. In possible identifier contexts when not C++23, don't + emit an error but warning about unknown character names and treat as + separate tokens. When treating as separate tokens \u{ or \N{, emit + warnings. + 2022-09-05 Joseph Myers * init.cc (lang_defaults): Disable trigraphs for C2x. -- cgit v1.1 From 95c7d5899521a9e266c68cbcc92edfd2cde8694e Mon Sep 17 00:00:00 2001 From: Lewis Hyatt Date: Wed, 7 Sep 2022 09:33:26 -0400 Subject: pch: Fix the reconstruction of adhoc data hash table The function rebuild_location_adhoc_htab() was meant to reconstruct the adhoc location hash map after restoring a line_maps instance from a PCH. However, the function has never performed as intended because it missed the last step of adding the data into the newly reconstructed hash map. This patch fixes that. It does not seem possible to construct a test case such that the current incorrect behavior is observable as a compiler issue. It would be observable, if it were possible for a precompiled header to contain an adhoc location with a non-zero custom data pointer. But currently, such data pointers are used only by the middle end to track inlining information, and this happens later, too late to show up in a PCH. I also noted that location_adhoc_data_update, which updates the hash map pointers in a different scenario, was relying on undefined pointer arithmetic behavior. I'm not aware of this having caused any issue in practice, but in this patch I have also changed it to use defined pointer operations instead. libcpp/ChangeLog: * line-map.cc (location_adhoc_data_update): Remove reliance on undefined behavior. (get_combined_adhoc_loc): Likewise. (rebuild_location_adhoc_htab): Fix issue where the htab was not properly updated. --- libcpp/line-map.cc | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'libcpp') diff --git a/libcpp/line-map.cc b/libcpp/line-map.cc index 62077c3..391f1d4 100644 --- a/libcpp/line-map.cc +++ b/libcpp/line-map.cc @@ -85,27 +85,38 @@ location_adhoc_data_eq (const void *l1, const void *l2) && lb1->data == lb2->data); } -/* Update the hashtable when location_adhoc_data is reallocated. */ +/* Update the hashtable when location_adhoc_data_map::data is reallocated. + The param is an array of two pointers, the previous value of the data + pointer, and then the new value. The pointers stored in the hash map + are then rebased to be relative to the new data pointer instead of the + old one. */ static int -location_adhoc_data_update (void **slot, void *data) +location_adhoc_data_update (void **slot_v, void *param_v) { - *((char **) slot) - = (char *) ((uintptr_t) *((char **) slot) + *((ptrdiff_t *) data)); + const auto slot = reinterpret_cast (slot_v); + const auto param = static_cast (param_v); + *slot = (*slot - param[0]) + param[1]; return 1; } -/* Rebuild the hash table from the location adhoc data. */ +/* The adhoc data hash table is not part of the GGC infrastructure, so it was + not initialized when SET was reconstructed from PCH; take care of that by + rebuilding it from scratch. */ void rebuild_location_adhoc_htab (line_maps *set) { - unsigned i; set->location_adhoc_data_map.htab = htab_create (100, location_adhoc_data_hash, location_adhoc_data_eq, NULL); - for (i = 0; i < set->location_adhoc_data_map.curr_loc; i++) - htab_find_slot (set->location_adhoc_data_map.htab, - set->location_adhoc_data_map.data + i, INSERT); + for (auto p = set->location_adhoc_data_map.data, + end = p + set->location_adhoc_data_map.curr_loc; + p != end; ++p) + { + const auto slot = reinterpret_cast + (htab_find_slot (set->location_adhoc_data_map.htab, p, INSERT)); + *slot = p; + } } /* Helper function for get_combined_adhoc_loc. @@ -211,8 +222,7 @@ get_combined_adhoc_loc (line_maps *set, if (set->location_adhoc_data_map.curr_loc >= set->location_adhoc_data_map.allocated) { - char *orig_data = (char *) set->location_adhoc_data_map.data; - ptrdiff_t offset; + const auto orig_data = set->location_adhoc_data_map.data; /* Cast away extern "C" from the type of xrealloc. */ line_map_realloc reallocator = (set->reallocator ? set->reallocator @@ -226,10 +236,13 @@ get_combined_adhoc_loc (line_maps *set, reallocator (set->location_adhoc_data_map.data, set->location_adhoc_data_map.allocated * sizeof (struct location_adhoc_data)); - offset = (char *) (set->location_adhoc_data_map.data) - orig_data; if (set->location_adhoc_data_map.allocated > 128) - htab_traverse (set->location_adhoc_data_map.htab, - location_adhoc_data_update, &offset); + { + location_adhoc_data *param[2] + = {orig_data, set->location_adhoc_data_map.data}; + htab_traverse (set->location_adhoc_data_map.htab, + location_adhoc_data_update, param); + } } *slot = set->location_adhoc_data_map.data + set->location_adhoc_data_map.curr_loc; -- cgit v1.1 From 2e7ad70c4abacbd2614358cf057397620d641b0a Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 9 Sep 2022 00:18:05 +0000 Subject: Daily bump. --- libcpp/ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'libcpp') diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 625b4e5..5984915 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,11 @@ +2022-09-08 Lewis Hyatt + + * line-map.cc (location_adhoc_data_update): Remove reliance on + undefined behavior. + (get_combined_adhoc_loc): Likewise. + (rebuild_location_adhoc_htab): Fix issue where the htab was not + properly updated. + 2022-09-07 Joseph Myers * include/cpplib.h (struct cpp_options): Add true_false. -- cgit v1.1