diff options
author | Martin Liska <mliska@suse.cz> | 2022-09-12 10:43:19 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-09-12 10:43:19 +0200 |
commit | fdb97cd0b7d15efa39ba79dca44be93debb0ef12 (patch) | |
tree | 65a6d95503fb9897bda29c72a629e57bb773d1c1 /libcpp | |
parent | 918bc838c2803f08e4d7ccd179396d48cb8ec804 (diff) | |
parent | 643ae816f17745a77b62188b6bf169211609a59b (diff) | |
download | gcc-fdb97cd0b7d15efa39ba79dca44be93debb0ef12.zip gcc-fdb97cd0b7d15efa39ba79dca44be93debb0ef12.tar.gz gcc-fdb97cd0b7d15efa39ba79dca44be93debb0ef12.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'libcpp')
-rw-r--r-- | libcpp/ChangeLog | 31 | ||||
-rw-r--r-- | libcpp/charset.cc | 86 | ||||
-rw-r--r-- | libcpp/expr.cc | 2 | ||||
-rw-r--r-- | libcpp/include/cpplib.h | 10 | ||||
-rw-r--r-- | libcpp/init.cc | 53 | ||||
-rw-r--r-- | libcpp/line-map.cc | 41 |
6 files changed, 167 insertions, 56 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 7aba4f5..5984915 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,34 @@ +2022-09-08 Lewis Hyatt <lhyatt@gmail.com> + + * line-map.cc (location_adhoc_data_update): Remove reliance on + undefined behavior. + (get_combined_adhoc_loc): Likewise. + (rebuild_location_adhoc_htab): Fix issue where the htab was not + properly updated. + +2022-09-07 Joseph Myers <joseph@codesourcery.com> + + * include/cpplib.h (struct cpp_options): Add true_false. + * expr.cc (eval_token): Check true_false not cplusplus to + determine whether to handle true and false keywords. + * init.cc (struct lang_flags): Add true_false. + (lang_defaults): Update. + (cpp_set_lang): Set true_false. + +2022-09-07 Jakub Jelinek <jakub@redhat.com> + + * include/cpplib.h (struct cpp_options): Add cpp_warn_unicode member. + (enum cpp_warning_reason): Add CPP_W_UNICODE. + * init.cc (cpp_create_reader): Initialize cpp_warn_unicode. + * charset.cc (_cpp_valid_ucn): In possible identifier contexts, don't + handle \u{ or \N{ specially in -std=c* modes except -std=c++2{3,b}. + In possible identifier contexts, don't emit an error and punt + if \N isn't followed by {, or if \N{} surrounds some lower case + letters or _. In possible identifier contexts when not C++23, don't + emit an error but warning about unknown character names and treat as + separate tokens. When treating as separate tokens \u{ or \N{, emit + warnings. + 2022-09-05 Joseph Myers <joseph@codesourcery.com> * init.cc (lang_defaults): Disable trigraphs for C2x. diff --git a/libcpp/charset.cc b/libcpp/charset.cc index c9656db..6834969 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -1448,7 +1448,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, if (str[-1] == 'u') { length = 4; - if (str < limit && *str == '{') + if (str < limit + && *str == '{' + && (!identifier_pos + || CPP_OPTION (pfile, delimited_escape_seqs) + || !CPP_OPTION (pfile, std))) { str++; /* Magic value to indicate no digits seen. */ @@ -1462,8 +1466,22 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, else if (str[-1] == 'N') { length = 4; + if (identifier_pos + && !CPP_OPTION (pfile, delimited_escape_seqs) + && CPP_OPTION (pfile, std)) + { + *cp = 0; + return false; + } if (str == limit || *str != '{') - cpp_error (pfile, CPP_DL_ERROR, "'\\N' not followed by '{'"); + { + if (identifier_pos) + { + *cp = 0; + return false; + } + cpp_error (pfile, CPP_DL_ERROR, "'\\N' not followed by '{'"); + } else { str++; @@ -1489,15 +1507,19 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, if (str < limit && *str == '}') { - if (name == str && identifier_pos) + if (identifier_pos && name == str) { + cpp_warning (pfile, CPP_W_UNICODE, + "empty named universal character escape " + "sequence; treating it as separate tokens"); *cp = 0; return false; } if (name == str) cpp_error (pfile, CPP_DL_ERROR, "empty named universal character escape sequence"); - else if (!CPP_OPTION (pfile, delimited_escape_seqs) + else if ((!identifier_pos || strict) + && !CPP_OPTION (pfile, delimited_escape_seqs) && CPP_OPTION (pfile, cpp_pedantic)) cpp_error (pfile, CPP_DL_PEDWARN, "named universal character escapes are only valid " @@ -1515,27 +1537,51 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, uname2c_tree, NULL); if (result == (cppchar_t) -1) { - cpp_error (pfile, CPP_DL_ERROR, - "\\N{%.*s} is not a valid universal " - "character", (int) (str - name), name); + bool ret = true; + if (identifier_pos + && (!CPP_OPTION (pfile, delimited_escape_seqs) + || !strict)) + ret = cpp_warning (pfile, CPP_W_UNICODE, + "\\N{%.*s} is not a valid " + "universal character; treating it " + "as separate tokens", + (int) (str - name), name); + else + cpp_error (pfile, CPP_DL_ERROR, + "\\N{%.*s} is not a valid universal " + "character", (int) (str - name), name); /* Try to do a loose name lookup according to Unicode loose matching rule UAX44-LM2. */ char canon_name[uname2c_max_name_len + 1]; result = _cpp_uname2c_uax44_lm2 ((const char *) name, str - name, canon_name); - if (result != (cppchar_t) -1) + if (result != (cppchar_t) -1 && ret) cpp_error (pfile, CPP_DL_NOTE, "did you mean \\N{%s}?", canon_name); else - result = 0x40; + result = 0xC0; + if (identifier_pos + && (!CPP_OPTION (pfile, delimited_escape_seqs) + || !strict)) + { + *cp = 0; + return false; + } } } str++; extend_char_range (char_range, loc_reader); } else if (identifier_pos) - length = 1; + { + cpp_warning (pfile, CPP_W_UNICODE, + "'\\N{' not terminated with '}' after %.*s; " + "treating it as separate tokens", + (int) (str - base), base); + *cp = 0; + return false; + } else { cpp_error (pfile, CPP_DL_ERROR, @@ -1584,12 +1630,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, } while (--length); - if (delimited - && str < limit - && *str == '}' - && (length != 32 || !identifier_pos)) + if (delimited && str < limit && *str == '}') { - if (length == 32) + if (length == 32 && identifier_pos) + { + cpp_warning (pfile, CPP_W_UNICODE, + "empty delimited escape sequence; " + "treating it as separate tokens"); + *cp = 0; + return false; + } + else if (length == 32) cpp_error (pfile, CPP_DL_ERROR, "empty delimited escape sequence"); else if (!CPP_OPTION (pfile, delimited_escape_seqs) @@ -1607,6 +1658,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, error message in that case. */ if (length && identifier_pos) { + if (delimited) + cpp_warning (pfile, CPP_W_UNICODE, + "'\\u{' not terminated with '}' after %.*s; " + "treating it as separate tokens", + (int) (str - base), base); *cp = 0; return false; } diff --git a/libcpp/expr.cc b/libcpp/expr.cc index a022904..4bb02c4 100644 --- a/libcpp/expr.cc +++ b/libcpp/expr.cc @@ -1183,7 +1183,7 @@ eval_token (cpp_reader *pfile, const cpp_token *token, case CPP_NAME: if (token->val.node.node == pfile->spec_nodes.n_defined) return parse_defined (pfile); - else if (CPP_OPTION (pfile, cplusplus) + else if (CPP_OPTION (pfile, true_false) && (token->val.node.node == pfile->spec_nodes.n_true || token->val.node.node == pfile->spec_nodes.n_false)) { diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 1a3fb19..2db1e9c 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -526,6 +526,9 @@ struct cpp_options /* Nonzero for C++23 delimited escape sequences. */ unsigned char delimited_escape_seqs; + /* Nonzero for 'true' and 'false' in #if expressions. */ + unsigned char true_false; + /* Holds the name of the target (execution) character set. */ const char *narrow_charset; @@ -565,6 +568,10 @@ struct cpp_options 2 if it should be a pedwarn. */ unsigned char cpp_warn_invalid_utf8; + /* True if libcpp should warn about invalid forms of delimited or named + escape sequences. */ + bool cpp_warn_unicode; + /* True if -finput-charset= option has been used explicitly. */ bool cpp_input_charset_explicit; @@ -675,7 +682,8 @@ enum cpp_warning_reason { CPP_W_CXX20_COMPAT, CPP_W_EXPANSION_TO_DEFINED, CPP_W_BIDIRECTIONAL, - CPP_W_INVALID_UTF8 + CPP_W_INVALID_UTF8, + CPP_W_UNICODE }; /* Callback for header lookup for HEADER, which is the name of a diff --git a/libcpp/init.cc b/libcpp/init.cc index 3e5601a..d3b4f00 100644 --- a/libcpp/init.cc +++ b/libcpp/init.cc @@ -98,34 +98,35 @@ struct lang_flags char elifdef; char warning_directive; char delimited_escape_seqs; + char true_false; }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, - /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, - /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, - /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */ + /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 }, + /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 }, + /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 }, + /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 }, + /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 }, + /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /* Sets internal flags correctly for a given language. */ @@ -157,6 +158,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang) CPP_OPTION (pfile, elifdef) = l->elifdef; CPP_OPTION (pfile, warning_directive) = l->warning_directive; CPP_OPTION (pfile, delimited_escape_seqs) = l->delimited_escape_seqs; + CPP_OPTION (pfile, true_false) = l->true_false; } /* Initialize library global state. */ @@ -228,6 +230,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, CPP_OPTION (pfile, warn_date_time) = 0; CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; CPP_OPTION (pfile, cpp_warn_invalid_utf8) = 0; + CPP_OPTION (pfile, cpp_warn_unicode) = 1; CPP_OPTION (pfile, cpp_input_charset_explicit) = 0; /* Default CPP arithmetic to something sensible for the host for the diff --git a/libcpp/line-map.cc b/libcpp/line-map.cc index 62077c3..391f1d4 100644 --- a/libcpp/line-map.cc +++ b/libcpp/line-map.cc @@ -85,27 +85,38 @@ location_adhoc_data_eq (const void *l1, const void *l2) && lb1->data == lb2->data); } -/* Update the hashtable when location_adhoc_data is reallocated. */ +/* Update the hashtable when location_adhoc_data_map::data is reallocated. + The param is an array of two pointers, the previous value of the data + pointer, and then the new value. The pointers stored in the hash map + are then rebased to be relative to the new data pointer instead of the + old one. */ static int -location_adhoc_data_update (void **slot, void *data) +location_adhoc_data_update (void **slot_v, void *param_v) { - *((char **) slot) - = (char *) ((uintptr_t) *((char **) slot) + *((ptrdiff_t *) data)); + const auto slot = reinterpret_cast<location_adhoc_data **> (slot_v); + const auto param = static_cast<location_adhoc_data **> (param_v); + *slot = (*slot - param[0]) + param[1]; return 1; } -/* Rebuild the hash table from the location adhoc data. */ +/* The adhoc data hash table is not part of the GGC infrastructure, so it was + not initialized when SET was reconstructed from PCH; take care of that by + rebuilding it from scratch. */ void rebuild_location_adhoc_htab (line_maps *set) { - unsigned i; set->location_adhoc_data_map.htab = htab_create (100, location_adhoc_data_hash, location_adhoc_data_eq, NULL); - for (i = 0; i < set->location_adhoc_data_map.curr_loc; i++) - htab_find_slot (set->location_adhoc_data_map.htab, - set->location_adhoc_data_map.data + i, INSERT); + for (auto p = set->location_adhoc_data_map.data, + end = p + set->location_adhoc_data_map.curr_loc; + p != end; ++p) + { + const auto slot = reinterpret_cast<location_adhoc_data **> + (htab_find_slot (set->location_adhoc_data_map.htab, p, INSERT)); + *slot = p; + } } /* Helper function for get_combined_adhoc_loc. @@ -211,8 +222,7 @@ get_combined_adhoc_loc (line_maps *set, if (set->location_adhoc_data_map.curr_loc >= set->location_adhoc_data_map.allocated) { - char *orig_data = (char *) set->location_adhoc_data_map.data; - ptrdiff_t offset; + const auto orig_data = set->location_adhoc_data_map.data; /* Cast away extern "C" from the type of xrealloc. */ line_map_realloc reallocator = (set->reallocator ? set->reallocator @@ -226,10 +236,13 @@ get_combined_adhoc_loc (line_maps *set, reallocator (set->location_adhoc_data_map.data, set->location_adhoc_data_map.allocated * sizeof (struct location_adhoc_data)); - offset = (char *) (set->location_adhoc_data_map.data) - orig_data; if (set->location_adhoc_data_map.allocated > 128) - htab_traverse (set->location_adhoc_data_map.htab, - location_adhoc_data_update, &offset); + { + location_adhoc_data *param[2] + = {orig_data, set->location_adhoc_data_map.data}; + htab_traverse (set->location_adhoc_data_map.htab, + location_adhoc_data_update, param); + } } *slot = set->location_adhoc_data_map.data + set->location_adhoc_data_map.curr_loc; |