diff options
author | Ian Lance Taylor <iant@golang.org> | 2022-02-11 15:02:44 -0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2022-02-11 15:02:44 -0800 |
commit | 9a510fb0970d3d9a4201bce8965cabe67850386b (patch) | |
tree | 43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libcpp | |
parent | a6d3012b274f38b20e2a57162106f625746af6c6 (diff) | |
parent | 8dc2499aa62f768c6395c9754b8cabc1ce25c494 (diff) | |
download | gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.zip gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.tar.gz gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.tar.bz2 |
Merge from trunk revision 8dc2499aa62f768c6395c9754b8cabc1ce25c494
Diffstat (limited to 'libcpp')
-rw-r--r-- | libcpp/ChangeLog | 327 | ||||
-rw-r--r-- | libcpp/Makefile.in | 16 | ||||
-rw-r--r-- | libcpp/charset.cc (renamed from libcpp/charset.c) | 76 | ||||
-rw-r--r-- | libcpp/config.in | 6 | ||||
-rwxr-xr-x | libcpp/configure | 73 | ||||
-rw-r--r-- | libcpp/configure.ac | 8 | ||||
-rw-r--r-- | libcpp/directives.cc (renamed from libcpp/directives.c) | 13 | ||||
-rw-r--r-- | libcpp/errors.cc (renamed from libcpp/errors.c) | 84 | ||||
-rw-r--r-- | libcpp/expr.cc (renamed from libcpp/expr.c) | 2 | ||||
-rw-r--r-- | libcpp/files.cc (renamed from libcpp/files.c) | 15 | ||||
-rw-r--r-- | libcpp/identifiers.cc (renamed from libcpp/identifiers.c) | 2 | ||||
-rw-r--r-- | libcpp/include/cpplib.h | 134 | ||||
-rw-r--r-- | libcpp/include/line-map.h | 37 | ||||
-rw-r--r-- | libcpp/include/mkdeps.h | 4 | ||||
-rw-r--r-- | libcpp/include/symtab.h | 2 | ||||
-rw-r--r-- | libcpp/init.cc (renamed from libcpp/init.c) | 61 | ||||
-rw-r--r-- | libcpp/internal.h | 66 | ||||
-rw-r--r-- | libcpp/lex.cc (renamed from libcpp/lex.c) | 632 | ||||
-rw-r--r-- | libcpp/line-map.cc (renamed from libcpp/line-map.c) | 11 | ||||
-rw-r--r-- | libcpp/macro.cc (renamed from libcpp/macro.c) | 100 | ||||
-rw-r--r-- | libcpp/makeucnid.cc (renamed from libcpp/makeucnid.c) | 4 | ||||
-rw-r--r-- | libcpp/mkdeps.cc (renamed from libcpp/mkdeps.c) | 2 | ||||
-rw-r--r-- | libcpp/pch.cc (renamed from libcpp/pch.c) | 2 | ||||
-rw-r--r-- | libcpp/po/ChangeLog | 4 | ||||
-rw-r--r-- | libcpp/po/es.po | 45 | ||||
-rw-r--r-- | libcpp/symtab.cc (renamed from libcpp/symtab.c) | 2 | ||||
-rw-r--r-- | libcpp/system.h | 14 | ||||
-rw-r--r-- | libcpp/traditional.cc (renamed from libcpp/traditional.c) | 4 | ||||
-rw-r--r-- | libcpp/ucnid.h | 2 | ||||
-rw-r--r-- | libcpp/ucnid.tab | 2 |
30 files changed, 1498 insertions, 252 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 32c91eb..c3679fe 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,328 @@ +2022-02-01 Jakub Jelinek <jakub@redhat.com> + + PR preprocessor/104147 + * macro.cc (funlike_invocation_p): For padding prefer a token + with val.source non-NULL especially if it has PREV_WHITE set + on val.source->flags. Add gcc_assert that CPP_PADDING tokens + don't have PREV_WHITE set in flags. + +2022-02-01 Jakub Jelinek <jakub@redhat.com> + + * directives.cc (destringize_and_run): Push &pfile->avoid_paste + instead of a copy of pfile->directive_result for the CPP_PADDING + case. + +2022-01-24 Marek Polacek <polacek@redhat.com> + + PR preprocessor/104030 + * include/cpplib.h (enum cpp_bidirectional_level): Add + bidirectional_ucn. Set values explicitly. + * internal.h (cpp_reader): Adjust warn_bidi_p. + * lex.cc (maybe_warn_bidi_on_close): Don't warn about UCNs + unless UCN checking is on. + (maybe_warn_bidi_on_char): Likewise. + +2022-01-18 Richard Biener <rguenther@suse.de> + + * include/line-map.h (class line_maps): Re-arrange fields + to minimize padding. + (class rich_location): Likewise. + * line-map.cc (rich_location::rich_location): Adjust. + +2022-01-17 Martin Liska <mliska@suse.cz> + + * Makefile.in: Rename .c names to .cc. + +2022-01-17 Martin Liska <mliska@suse.cz> + + * Makefile.in: Rename .c names to .cc. + * charset.cc (convert_escape): Likewise. + * directives.cc (directive_diagnostics): Likewise. + (_cpp_handle_directive): Likewise. + (lex_macro_node): Likewise. + * include/cpplib.h (struct _cpp_file): Likewise. + (PURE_ZERO): Likewise. + (cpp_defined): Likewise. + (cpp_error_at): Likewise. + (cpp_forall_identifiers): Likewise. + (cpp_compare_macros): Likewise. + (cpp_get_converted_source): Likewise. + (cpp_read_state): Likewise. + (cpp_directive_only_process): Likewise. + (struct cpp_decoded_char): Likewise. + * include/line-map.h (enum lc_reason): Likewise. + (enum location_aspect): Likewise. + * include/mkdeps.h: Likewise. + * init.cc (cpp_destroy): Likewise. + (cpp_finish): Likewise. + * internal.h (struct cpp_reader): Likewise. + (_cpp_defined_macro_p): Likewise. + (_cpp_backup_tokens_direct): Likewise. + (_cpp_destroy_hashtable): Likewise. + (_cpp_has_header): Likewise. + (_cpp_expand_op_stack): Likewise. + (_cpp_commit_buff): Likewise. + (_cpp_restore_special_builtin): Likewise. + (_cpp_bracket_include): Likewise. + (_cpp_replacement_text_len): Likewise. + (ufputs): Likewise. + * line-map.cc (linemap_macro_loc_to_exp_point): Likewise. + (linemap_check_files_exited): Likewise. + (line_map_new_raw): Likewise. + * traditional.cc (enum ls): Likewise. + +2022-01-17 Martin Liska <mliska@suse.cz> + + * charset.c: Moved to... + * charset.cc: ...here. + * directives.c: Moved to... + * directives.cc: ...here. + * errors.c: Moved to... + * errors.cc: ...here. + * expr.c: Moved to... + * expr.cc: ...here. + * files.c: Moved to... + * files.cc: ...here. + * identifiers.c: Moved to... + * identifiers.cc: ...here. + * init.c: Moved to... + * init.cc: ...here. + * lex.c: Moved to... + * lex.cc: ...here. + * line-map.c: Moved to... + * line-map.cc: ...here. + * macro.c: Moved to... + * macro.cc: ...here. + * makeucnid.c: Moved to... + * makeucnid.cc: ...here. + * mkdeps.c: Moved to... + * mkdeps.cc: ...here. + * pch.c: Moved to... + * pch.cc: ...here. + * symtab.c: Moved to... + * symtab.cc: ...here. + * traditional.c: Moved to... + * traditional.cc: ...here. + +2022-01-15 Martin Sebor <msebor@redhat.com> + + * files.c (_cpp_find_file): Substitute a valid pointer for + an invalid one to avoid -Wuse-after-free. + +2022-01-12 Clément Chigot <clement.chigot@atos.net> + + * configure.ac: Check sizeof ino_t and dev_t. + * config.in: Regenerate. + * configure: Regenerate. + * include/cpplib.h (INO_T_CPP): Change for AIX. + (DEV_T_CPP): New macro. + (struct cpp_dir): Use it. + +2021-12-30 Jakub Jelinek <jakub@redhat.com> + + PR preprocessor/89971 + * macro.c (replace_args): For ##__VA_OPT__, if __VA_OPT__ expands + to no tokens at all, drop PASTE_LEFT flag from the previous token. + +2021-12-04 Jakub Jelinek <jakub@redhat.com> + + PR preprocessor/102432 + * lex.c (_cpp_lex_direct): If buffer->need_line while + pfile->state.in_deferred_pragma, return CPP_PRAGMA_EOL token instead + of assertion failure. + +2021-12-03 Jakub Jelinek <jakub@redhat.com> + + PR pch/71934 + * include/line-map.h (class line_maps): Add GTY((callback)) to + reallocator and round_alloc_size members. + +2021-12-01 Jakub Jelinek <jakub@redhat.com> + + PR c++/100977 + * init.c (struct lang_flags): Remove cxx23_identifiers. + (lang_defaults): Remove cxx23_identifiers initializers. + (cpp_set_lang): Don't copy cxx23_identifiers. + * include/cpplib.h (struct cpp_options): Adjust comment about + c11_identifiers. Remove cxx23_identifiers field. + * lex.c (warn_about_normalization): Use cplusplus instead of + cxx23_identifiers. + * charset.c (ucn_valid_in_identifier): Likewise. + +2021-12-01 Jakub Jelinek <jakub@redhat.com> + + PR preprocessor/103415 + * macro.c (stringify_arg): Remove va_opt argument and va_opt handling. + (paste_tokens): On successful paste or in PREV_WHITE and + PREV_FALLTHROUGH flags from the *plhs token to the new token. + (replace_args): Adjust stringify_arg callers. For #__VA_OPT__, + perform token pasting in a separate loop before stringify_arg call. + +2021-11-30 Richard Biener <rguenther@suse.de> + + * charset.c (convert_escape): Remove unreachable break. + +2021-11-30 Jakub Jelinek <jakub@redhat.com> + + PR c++/100977 + * init.c (lang_defaults): Enable cxx23_identifiers for + -std={gnu,c}++{11,14,17,20} too. + +2021-11-29 Eric Gallager <egallager@gcc.gnu.org> + + PR other/103021 + * Makefile.in: Use ETAGS variable in TAGS target. + * configure: Regenerate. + * configure.ac: Allow ETAGS variable to be overridden. + +2021-11-23 Christophe Lyon <christophe.lyon@foss.st.com> + + PR preprocessor/103355 + * system.h (ATTR_LIKELY): Fix definition. + +2021-11-23 Marek Polacek <polacek@redhat.com> + Jonathan Wakely <jwakely@redhat.com> + + PR preprocessor/103355 + * lex.c: Use ATTR_LIKELY instead of [[likely]]. + * system.h (ATTR_LIKELY): Define. + +2021-11-22 Jakub Jelinek <jakub@redhat.com> + Tobias Burnus <tobias@codesourcery.com> + + PR preprocessor/103165 + * internal.h (struct lexer_state): Add ignore__Pragma field. + * macro.c (builtin_macro): Don't interpret _Pragma if + pfile->state.ignore__Pragma. + (expand_arg): Temporarily set pfile->state.ignore__Pragma to 1. + +2021-11-17 David Malcolm <dmalcolm@redhat.com> + + PR preprocessor/103026 + * lex.c (struct bidi::context): New. + (bidi::vec): Convert to a vec of context rather than unsigned + char. + (bidi::ctx_at): Rename to... + (bidi::pop_kind_at): ...this and reimplement for above change. + (bidi::current_ctx): Update for change to vec. + (bidi::current_ctx_ucn_p): Likewise. + (bidi::current_ctx_loc): New. + (bidi::on_char): Update for usage of context struct. Add "loc" + param and pass it when pushing contexts. + (get_location_for_byte_range_in_cur_line): New. + (get_bidi_utf8): Rename to... + (get_bidi_utf8_1): ...this, reintroducing... + (get_bidi_utf8): ...as a wrapper, setting *OUT when the result is + not NONE. + (get_bidi_ucn): Rename to... + (get_bidi_ucn_1): ...this, reintroducing... + (get_bidi_ucn): ...as a wrapper, setting *OUT when the result is + not NONE. + (class unpaired_bidi_rich_location): New. + (maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when + reporting on unpaired bidi chars. Split into singular vs plural + spellings. + (maybe_warn_bidi_on_char): Pass in a location_t rather than a + const uchar * and use it when emitting warnings, and when calling + bidi::on_char. + (_cpp_skip_block_comment): Capture location when kind is not NONE + and pass it to maybe_warn_bidi_on_char. + (skip_line_comment): Likewise. + (forms_identifier_p): Likewise. + (lex_raw_string): Likewise. + (lex_string): Likewise. + +2021-11-17 David Malcolm <dmalcolm@redhat.com> + + PR preprocessor/103026 + * lex.c (maybe_warn_bidi_on_close): Use a rich_location + and call set_escape_on_output (true) on it. + (maybe_warn_bidi_on_char): Likewise. + +2021-11-17 Jakub Jelinek <jakub@redhat.com> + + PR preprocessor/103130 + * lex.c (cpp_directive_only_process): Treat even \*/ as end of block + comment. + +2021-11-17 Marek Polacek <polacek@redhat.com> + + PR preprocessor/103026 + * include/cpplib.h (enum cpp_bidirectional_level): New. + (struct cpp_options): Add cpp_warn_bidirectional. + (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. + * internal.h (struct cpp_reader): Add warn_bidi_p member + function. + * init.c (cpp_create_reader): Set cpp_warn_bidirectional. + * lex.c (bidi): New namespace. + (get_bidi_utf8): New function. + (get_bidi_ucn): Likewise. + (maybe_warn_bidi_on_close): Likewise. + (maybe_warn_bidi_on_char): Likewise. + (_cpp_skip_block_comment): Implement warning about bidirectional + control characters. + (skip_line_comment): Likewise. + (forms_identifier_p): Likewise. + (lex_identifier): Likewise. + (lex_string): Likewise. + (lex_raw_string): Likewise. + +2021-11-01 David Malcolm <dmalcolm@redhat.com> + + * charset.c (convert_escape): Use encoding_rich_location when + complaining about nonprintable unknown escape sequences. + (cpp_display_width_computation::::cpp_display_width_computation): + Pass in policy rather than tabstop. + (cpp_display_width_computation::process_next_codepoint): Add "out" + param and populate *out if non-NULL. + (cpp_display_width_computation::advance_display_cols): Pass NULL + to process_next_codepoint. + (cpp_byte_column_to_display_column): Pass in policy rather than + tabstop. Pass NULL to process_next_codepoint. + (cpp_display_column_to_byte_column): Pass in policy rather than + tabstop. + * errors.c (cpp_diagnostic_get_current_location): New function, + splitting out the logic from... + (cpp_diagnostic): ...here. + (cpp_warning_at): New function. + (cpp_pedwarning_at): New function. + * include/cpplib.h (cpp_warning_at): New decl for rich_location. + (cpp_pedwarning_at): Likewise. + (struct cpp_decoded_char): New. + (struct cpp_char_column_policy): New. + (cpp_display_width_computation::cpp_display_width_computation): + Replace "tabstop" param with "policy". + (cpp_display_width_computation::process_next_codepoint): Add "out" + param. + (cpp_display_width_computation::m_tabstop): Replace with... + (cpp_display_width_computation::m_policy): ...this. + (cpp_byte_column_to_display_column): Replace "tabstop" param with + "policy". + (cpp_display_width): Likewise. + (cpp_display_column_to_byte_column): Likewise. + * include/line-map.h (rich_location::escape_on_output_p): New. + (rich_location::set_escape_on_output): New. + (rich_location::m_escape_on_output): New. + * internal.h (cpp_diagnostic_get_current_location): New decl. + (class encoding_rich_location): New. + * lex.c (skip_whitespace): Use encoding_rich_location when + complaining about null characters. + (warn_about_normalization): Generate a source range when + complaining about improperly normalized tokens, rather than just a + point, and use encoding_rich_location so that the source code + is escaped on printing. + * line-map.c (rich_location::rich_location): Initialize + m_escape_on_output. + +2021-10-29 Tobias Burnus <tobias@codesourcery.com> + + PR c++/102409 + * directives.c (destringize_and_run): Add PRAGMA_OP to the + CPP_PRAGMA token's flags to mark is as coming from _Pragma. + * include/cpplib.h (PRAGMA_OP): #define, to be used with token flags. + * macro.c (collect_args): Only handle CPP_PRAGMA special if PRAGMA_OP + is set. + 2021-10-22 Eric Gallager <egallager@gcc.gnu.org> PR other/102663 @@ -6861,7 +7186,7 @@ * include/symtab.h: Moved from gcc/hashtable.h. Change header guard name. -Copyright (C) 2004-2021 Free Software Foundation, Inc. +Copyright (C) 2004-2022 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright diff --git a/libcpp/Makefile.in b/libcpp/Makefile.in index 34e4206..9e4c3fe 100644 --- a/libcpp/Makefile.in +++ b/libcpp/Makefile.in @@ -1,7 +1,7 @@ # @configure_input@ # Makefile for libcpp. Run 'configure' to generate Makefile from Makefile.in -# Copyright (C) 2004-2021 Free Software Foundation, Inc. +# Copyright (C) 2004-2022 Free Software Foundation, Inc. #This file is part of libcpp. @@ -89,14 +89,14 @@ libcpp_a_OBJS = charset.o directives.o errors.o \ expr.o files.o identifiers.o init.o lex.o line-map.o macro.o \ mkdeps.o pch.o symtab.o traditional.o -libcpp_a_SOURCES = charset.c directives.c errors.c \ - expr.c files.c identifiers.c init.c lex.c line-map.c macro.c \ - mkdeps.c pch.c symtab.c traditional.c +libcpp_a_SOURCES = charset.cc directives.cc errors.cc \ + expr.cc files.cc identifiers.cc init.cc lex.cc line-map.cc macro.cc \ + mkdeps.cc pch.cc symtab.cc traditional.cc all: libcpp.a $(USED_CATALOGS) .SUFFIXES: -.SUFFIXES: .c .gmo .o .obj .po .pox +.SUFFIXES: .cc .gmo .o .obj .po .pox libcpp.a: $(libcpp_a_OBJS) -rm -f libcpp.a @@ -223,7 +223,7 @@ endif # Implicit rules and I18N -.c.o: +.cc.o: $(COMPILE) $< $(POSTCOMPILE) @@ -264,11 +264,13 @@ po/$(PACKAGE).pot: $(libcpp_a_SOURCES) sed 's:$(srcdir)/::g' <po/$(PACKAGE).pot.tmp >po/$(PACKAGE).pot rm po/$(PACKAGE).pot.tmp +ETAGS = @ETAGS@ + TAGS_SOURCES = $(libcpp_a_SOURCES) internal.h system.h ucnid.h \ include/cpplib.h include/line-map.h include/mkdeps.h include/symtab.h TAGS: $(TAGS_SOURCES) - cd $(srcdir) && etags $(TAGS_SOURCES) + cd $(srcdir) && $(ETAGS) $(TAGS_SOURCES) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/libcpp/charset.c b/libcpp/charset.cc index e4e45f6..ca8b7cf 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.cc @@ -1,7 +1,7 @@ /* CPP Library - charsets - Copyright (C) 1998-2021 Free Software Foundation, Inc. + Copyright (C) 1998-2022 Free Software Foundation, Inc. - Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges. + Broken out of c-lex.cc Apr 2003, adding valid C99 UCN ranges. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -955,14 +955,12 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, valid_flags = C99 | CXX | C11 | CXX23; if (CPP_PEDANTIC (pfile)) { - if (CPP_OPTION (pfile, cxx23_identifiers)) + if (CPP_OPTION (pfile, cplusplus)) valid_flags = CXX23; else if (CPP_OPTION (pfile, c11_identifiers)) valid_flags = C11; else if (CPP_OPTION (pfile, c99)) valid_flags = C99; - else if (CPP_OPTION (pfile, cplusplus)) - valid_flags = CXX; } if (! (ucnranges[mn].flags & valid_flags)) return 0; @@ -1021,7 +1019,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, return 2; } - if (CPP_OPTION (pfile, cxx23_identifiers)) + if (CPP_OPTION (pfile, cplusplus)) invalid_start_flags = NXX23; else if (CPP_OPTION (pfile, c11_identifiers)) invalid_start_flags = N11; @@ -1534,7 +1532,6 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, case 'x': return convert_hex (pfile, from, limit, tbuf, cvt, char_range, loc_reader, ranges); - break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': @@ -1582,12 +1579,14 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, "unknown escape sequence: '\\%c'", (int) c); else { - /* diagnostic.c does not support "%03o". When it does, this + encoding_rich_location rich_loc (pfile); + + /* diagnostic.cc does not support "%03o". When it does, this code can use %03o directly in the diagnostic again. */ char buf[32]; sprintf(buf, "%03o", (int) c); - cpp_error (pfile, CPP_DL_PEDWARN, - "unknown escape sequence: '\\%s'", buf); + cpp_error_at (pfile, CPP_DL_PEDWARN, &rich_loc, + "unknown escape sequence: '\\%s'", buf); } } @@ -2345,14 +2344,16 @@ cpp_string_location_reader::get_next () } cpp_display_width_computation:: -cpp_display_width_computation (const char *data, int data_length, int tabstop) : +cpp_display_width_computation (const char *data, int data_length, + const cpp_char_column_policy &policy) : m_begin (data), m_next (m_begin), m_bytes_left (data_length), - m_tabstop (tabstop), + m_policy (policy), m_display_cols (0) { - gcc_assert (m_tabstop > 0); + gcc_assert (policy.m_tabstop > 0); + gcc_assert (policy.m_width_cb); } @@ -2364,19 +2365,28 @@ cpp_display_width_computation (const char *data, int data_length, int tabstop) : point to a valid UTF-8-encoded sequence, then it will be treated as a single byte with display width 1. m_cur_display_col is the current display column, relative to which tab stops should be expanded. Returns the display width of - the codepoint just processed. */ + the codepoint just processed. + If OUT is non-NULL, it is populated. */ int -cpp_display_width_computation::process_next_codepoint () +cpp_display_width_computation::process_next_codepoint (cpp_decoded_char *out) { cppchar_t c; int next_width; + if (out) + out->m_start_byte = m_next; + if (*m_next == '\t') { ++m_next; --m_bytes_left; - next_width = m_tabstop - (m_display_cols % m_tabstop); + next_width = m_policy.m_tabstop - (m_display_cols % m_policy.m_tabstop); + if (out) + { + out->m_ch = '\t'; + out->m_valid_ch = true; + } } else if (one_utf8_to_cppchar ((const uchar **) &m_next, &m_bytes_left, &c) != 0) @@ -2386,14 +2396,24 @@ cpp_display_width_computation::process_next_codepoint () of one. */ ++m_next; --m_bytes_left; - next_width = 1; + next_width = m_policy.m_undecoded_byte_width; + if (out) + out->m_valid_ch = false; } else { /* one_utf8_to_cppchar() has updated m_next and m_bytes_left for us. */ - next_width = cpp_wcwidth (c); + next_width = m_policy.m_width_cb (c); + if (out) + { + out->m_ch = c; + out->m_valid_ch = true; + } } + if (out) + out->m_next_byte = m_next; + m_display_cols += next_width; return next_width; } @@ -2409,7 +2429,7 @@ cpp_display_width_computation::advance_display_cols (int n) const int start = m_display_cols; const int target = start + n; while (m_display_cols < target && !done ()) - process_next_codepoint (); + process_next_codepoint (NULL); return m_display_cols - start; } @@ -2417,29 +2437,33 @@ cpp_display_width_computation::advance_display_cols (int n) how many display columns are occupied by the first COLUMN bytes. COLUMN may exceed DATA_LENGTH, in which case the phantom bytes at the end are treated as if they have display width 1. Tabs are expanded to the next tab - stop, relative to the start of DATA. */ + stop, relative to the start of DATA, and non-printable-ASCII characters + will be escaped as per POLICY. */ int cpp_byte_column_to_display_column (const char *data, int data_length, - int column, int tabstop) + int column, + const cpp_char_column_policy &policy) { const int offset = MAX (0, column - data_length); - cpp_display_width_computation dw (data, column - offset, tabstop); + cpp_display_width_computation dw (data, column - offset, policy); while (!dw.done ()) - dw.process_next_codepoint (); + dw.process_next_codepoint (NULL); return dw.display_cols_processed () + offset; } /* For the string of length DATA_LENGTH bytes that begins at DATA, compute the least number of bytes that will result in at least DISPLAY_COL display columns. The return value may exceed DATA_LENGTH if the entire string does - not occupy enough display columns. */ + not occupy enough display columns. Non-printable-ASCII characters + will be escaped as per POLICY. */ int cpp_display_column_to_byte_column (const char *data, int data_length, - int display_col, int tabstop) + int display_col, + const cpp_char_column_policy &policy) { - cpp_display_width_computation dw (data, data_length, tabstop); + cpp_display_width_computation dw (data, data_length, policy); const int avail_display = dw.advance_display_cols (display_col); return dw.bytes_processed () + MAX (0, display_col - avail_display); } diff --git a/libcpp/config.in b/libcpp/config.in index 89aa6a1..9983aee 100644 --- a/libcpp/config.in +++ b/libcpp/config.in @@ -258,6 +258,12 @@ /* Define to the version of this package. */ #undef PACKAGE_VERSION +/* The size of `dev_t', as computed by sizeof. */ +#undef SIZEOF_DEV_T + +/* The size of `ino_t', as computed by sizeof. */ +#undef SIZEOF_INO_T + /* The size of `int', as computed by sizeof. */ #undef SIZEOF_INT diff --git a/libcpp/configure b/libcpp/configure index 9674cd9..7514539 100755 --- a/libcpp/configure +++ b/libcpp/configure @@ -652,6 +652,7 @@ noexception_flags WARN_PEDANTIC c_warn warn +ETAGS AUTOHEADER AUTOCONF ACLOCAL @@ -4896,6 +4897,11 @@ done test -n "$AUTOHEADER" || AUTOHEADER="$MISSING autoheader" +if test -z "$ETAGS"; then + ETAGS=etags +fi + + # Figure out what compiler warnings we can enable. # See config/warnings.m4 for details. @@ -6638,6 +6644,73 @@ $as_echo "#define HAVE_UCHAR 1" >>confdefs.h fi +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of ino_t" >&5 +$as_echo_n "checking size of ino_t... " >&6; } +if ${ac_cv_sizeof_ino_t+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (ino_t))" "ac_cv_sizeof_ino_t" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_ino_t" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (ino_t) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_ino_t=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_ino_t" >&5 +$as_echo "$ac_cv_sizeof_ino_t" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_INO_T $ac_cv_sizeof_ino_t +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of dev_t" >&5 +$as_echo_n "checking size of dev_t... " >&6; } +if ${ac_cv_sizeof_dev_t+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (dev_t))" "ac_cv_sizeof_dev_t" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_dev_t" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (dev_t) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_dev_t=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_dev_t" >&5 +$as_echo "$ac_cv_sizeof_dev_t" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_DEV_T $ac_cv_sizeof_dev_t +_ACEOF + + + # g++ on Solaris 10+ defines _XOPEN_SOURCE=600, which exposes a different # iconv() prototype. ac_ext=cpp diff --git a/libcpp/configure.ac b/libcpp/configure.ac index 1efa96f..9b60425 100644 --- a/libcpp/configure.ac +++ b/libcpp/configure.ac @@ -22,6 +22,11 @@ AC_CHECK_PROGS([ACLOCAL], [aclocal], [$MISSING aclocal]) AC_CHECK_PROGS([AUTOCONF], [autoconf], [$MISSING autoconf]) AC_CHECK_PROGS([AUTOHEADER], [autoheader], [$MISSING autoheader]) +if test -z "$ETAGS"; then + ETAGS=etags +fi +AC_SUBST([ETAGS]) + # Figure out what compiler warnings we can enable. # See config/warnings.m4 for details. @@ -98,6 +103,9 @@ if test $ac_cv_type_uchar = yes; then [Define if <sys/types.h> defines \`uchar'.]) fi +AC_CHECK_SIZEOF(ino_t) +AC_CHECK_SIZEOF(dev_t) + # g++ on Solaris 10+ defines _XOPEN_SOURCE=600, which exposes a different # iconv() prototype. AC_LANG_PUSH([C++]) diff --git a/libcpp/directives.c b/libcpp/directives.cc index b4bc8b4..f804a44 100644 --- a/libcpp/directives.c +++ b/libcpp/directives.cc @@ -1,5 +1,5 @@ /* CPP Library. (Directive handling.) - Copyright (C) 1986-2021 Free Software Foundation, Inc. + Copyright (C) 1986-2022 Free Software Foundation, Inc. Contributed by Per Bothner, 1994-95. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 @@ -416,7 +416,7 @@ directive_diagnostics (cpp_reader *pfile, const directive *dir, int indented) /* Check if we have a known directive. INDENTED is true if the '#' of the directive was indented. This function is in this file - to save unnecessarily exporting dtable etc. to lex.c. Returns + to save unnecessarily exporting dtable etc. to lex.cc. Returns nonzero if the line of tokens has been handled, zero if we should continue processing the line. */ int @@ -480,7 +480,7 @@ _cpp_handle_directive (cpp_reader *pfile, bool indented) does not cause '#define foo bar' to get executed when compiled with -save-temps, we recognize directives in - -fpreprocessed mode only if the # is in column 1. macro.c + -fpreprocessed mode only if the # is in column 1. macro.cc puts a space in front of any '#' at the start of a macro. We exclude the -fdirectives-only case because macro expansion @@ -632,7 +632,7 @@ lex_macro_node (cpp_reader *pfile, bool is_def_or_undef) return NULL; } -/* Process a #define directive. Most work is done in macro.c. */ +/* Process a #define directive. Most work is done in macro.cc. */ static void do_define (cpp_reader *pfile) { @@ -1907,6 +1907,8 @@ destringize_and_run (cpp_reader *pfile, const cpp_string *in, save_directive = pfile->directive; pfile->directive = &dtable[T_PRAGMA]; do_pragma (pfile); + if (pfile->directive_result.type == CPP_PRAGMA) + pfile->directive_result.flags |= PRAGMA_OP; end_directive (pfile, 1); pfile->directive = save_directive; @@ -1952,8 +1954,7 @@ destringize_and_run (cpp_reader *pfile, const cpp_string *in, else { count = 1; - toks = XNEW (cpp_token); - toks[0] = pfile->directive_result; + toks = &pfile->avoid_paste; /* If we handled the entire pragma internally, make sure we get the line number correct for the next token. */ diff --git a/libcpp/errors.c b/libcpp/errors.cc index 5e1bf33..df5f8d6 100644 --- a/libcpp/errors.c +++ b/libcpp/errors.cc @@ -1,5 +1,5 @@ /* Default error handlers for CPP Library. - Copyright (C) 1986-2021 Free Software Foundation, Inc. + Copyright (C) 1986-2022 Free Software Foundation, Inc. Written by Per Bothner, 1994. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 @@ -27,6 +27,31 @@ along with this program; see the file COPYING3. If not see #include "cpplib.h" #include "internal.h" +/* Get a location_t for the current location in PFILE, + generally that of the previously lexed token. */ + +location_t +cpp_diagnostic_get_current_location (cpp_reader *pfile) +{ + if (CPP_OPTION (pfile, traditional)) + { + if (pfile->state.in_directive) + return pfile->directive_line; + else + return pfile->line_table->highest_line; + } + /* We don't want to refer to a token before the beginning of the + current run -- that is invalid. */ + else if (pfile->cur_token == pfile->cur_run->base) + { + return 0; + } + else + { + return pfile->cur_token[-1].src_loc; + } +} + /* Print a diagnostic at the given location. */ ATTRIBUTE_FPTR_PRINTF(5,0) @@ -52,25 +77,7 @@ cpp_diagnostic (cpp_reader * pfile, enum cpp_diagnostic_level level, enum cpp_warning_reason reason, const char *msgid, va_list *ap) { - location_t src_loc; - - if (CPP_OPTION (pfile, traditional)) - { - if (pfile->state.in_directive) - src_loc = pfile->directive_line; - else - src_loc = pfile->line_table->highest_line; - } - /* We don't want to refer to a token before the beginning of the - current run -- that is invalid. */ - else if (pfile->cur_token == pfile->cur_run->base) - { - src_loc = 0; - } - else - { - src_loc = pfile->cur_token[-1].src_loc; - } + location_t src_loc = cpp_diagnostic_get_current_location (pfile); rich_location richloc (pfile->line_table, src_loc); return cpp_diagnostic_at (pfile, level, reason, &richloc, msgid, ap); } @@ -144,6 +151,43 @@ cpp_warning_syshdr (cpp_reader * pfile, enum cpp_warning_reason reason, return ret; } +/* As cpp_warning above, but use RICHLOC as the location of the diagnostic. */ + +bool cpp_warning_at (cpp_reader *pfile, enum cpp_warning_reason reason, + rich_location *richloc, const char *msgid, ...) +{ + va_list ap; + bool ret; + + va_start (ap, msgid); + + ret = cpp_diagnostic_at (pfile, CPP_DL_WARNING, reason, richloc, + msgid, &ap); + + va_end (ap); + return ret; + +} + +/* As cpp_pedwarning above, but use RICHLOC as the location of the + diagnostic. */ + +bool +cpp_pedwarning_at (cpp_reader * pfile, enum cpp_warning_reason reason, + rich_location *richloc, const char *msgid, ...) +{ + va_list ap; + bool ret; + + va_start (ap, msgid); + + ret = cpp_diagnostic_at (pfile, CPP_DL_PEDWARN, reason, richloc, + msgid, &ap); + + va_end (ap); + return ret; +} + /* Print a diagnostic at a specific location. */ ATTRIBUTE_FPTR_PRINTF(6,0) diff --git a/libcpp/expr.c b/libcpp/expr.cc index ab4a260..78c5c3e 100644 --- a/libcpp/expr.c +++ b/libcpp/expr.cc @@ -1,5 +1,5 @@ /* Parse C expressions for cpplib. - Copyright (C) 1987-2021 Free Software Foundation, Inc. + Copyright (C) 1987-2022 Free Software Foundation, Inc. Contributed by Per Bothner, 1994. This program is free software; you can redistribute it and/or modify it diff --git a/libcpp/files.c b/libcpp/files.cc index c93a03c..24208f7 100644 --- a/libcpp/files.c +++ b/libcpp/files.cc @@ -1,5 +1,5 @@ /* Part of CPP library. File handling. - Copyright (C) 1986-2021 Free Software Foundation, Inc. + Copyright (C) 1986-2022 Free Software Foundation, Inc. Written by Per Bothner, 1994. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 @@ -553,12 +553,11 @@ _cpp_find_file (cpp_reader *pfile, const char *fname, cpp_dir *start_dir, { /* If *hash_slot is NULL, the above htab_find_slot_with_hash call just created the - slot, but we aren't going to store there - anything, so need to remove the newly created - entry. htab_clear_slot requires that it is - non-NULL, so store there some non-NULL pointer, - htab_clear_slot will overwrite it - immediately. */ + slot, but we aren't going to store there anything + of use, so need to remove the newly created entry. + htab_clear_slot requires that it is non-NULL, so + store some non-NULL but valid pointer there, + htab_clear_slot will immediately overwrite it. */ *hash_slot = file; htab_clear_slot (pfile->file_hash, hash_slot); } @@ -582,7 +581,7 @@ _cpp_find_file (cpp_reader *pfile, const char *fname, cpp_dir *start_dir, if (*hash_slot == NULL) { /* See comment on the above htab_clear_slot call. */ - *hash_slot = file; + *hash_slot = &hash_slot; htab_clear_slot (pfile->file_hash, hash_slot); } return NULL; diff --git a/libcpp/identifiers.c b/libcpp/identifiers.cc index dc0df11..835a971 100644 --- a/libcpp/identifiers.c +++ b/libcpp/identifiers.cc @@ -1,5 +1,5 @@ /* Hash tables for the CPP library. - Copyright (C) 1986-2021 Free Software Foundation, Inc. + Copyright (C) 1986-2022 Free Software Foundation, Inc. Written by Per Bothner, 1994. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 6e2fcb6..3eba6f7 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1,5 +1,5 @@ /* Definitions for CPP library. - Copyright (C) 1995-2021 Free Software Foundation, Inc. + Copyright (C) 1995-2022 Free Software Foundation, Inc. Written by Per Bothner, 1994-95. This program is free software; you can redistribute it and/or modify it @@ -46,7 +46,7 @@ struct _cpp_file; '='. The lexer needs operators ending in '=', like ">>=", to be in the same order as their counterparts without the '=', like ">>". - See the cpp_operator table optab in expr.c if you change the order or + See the cpp_operator table optab in expr.cc if you change the order or add or remove anything in the first group. */ #define TTYPE_TABLE \ @@ -192,12 +192,13 @@ struct GTY(()) cpp_string { comment. */ #define BOL (1 << 6) /* Token at beginning of line. */ #define PURE_ZERO (1 << 7) /* Single 0 digit, used by the C++ frontend, - set in c-lex.c. */ + set in c-lex.cc. */ #define SP_DIGRAPH (1 << 8) /* # or ## token was a digraph. */ #define SP_PREV_WHITE (1 << 9) /* If whitespace before a ## operator, or before this token after a # operator. */ #define NO_EXPAND (1 << 10) /* Do not macro-expand this token. */ +#define PRAGMA_OP (1 << 11) /* _Pragma token. */ /* Specify which field, if any, of the cpp_token union is used. */ @@ -318,6 +319,18 @@ enum cpp_main_search CMS_system, /* Search the system INCLUDE path. */ }; +/* The possible bidirectional control characters checking levels. */ +enum cpp_bidirectional_level { + /* No checking. */ + bidirectional_none = 0, + /* Only detect unpaired uses of bidirectional control characters. */ + bidirectional_unpaired = 1, + /* Detect any use of bidirectional control characters. */ + bidirectional_any = 2, + /* Also warn about UCNs. */ + bidirectional_ucn = 4 +}; + /* This structure is nested inside struct cpp_reader, and carries all the options visible to the command line. */ struct cpp_options @@ -479,13 +492,9 @@ struct cpp_options unsigned char ext_numeric_literals; /* Nonzero means extended identifiers allow the characters specified - in C11 and C++11. */ + in C11. */ unsigned char c11_identifiers; - /* Nonzero means extended identifiers allow the characters specified - in C++23. */ - unsigned char cxx23_identifiers; - /* Nonzero for C++ 2014 Standard binary constants. */ unsigned char binary_constants; @@ -538,6 +547,10 @@ struct cpp_options /* True if warn about differences between C++98 and C++11. */ bool cpp_warn_cxx11_compat; + /* Nonzero if bidirectional control characters checking is on. See enum + cpp_bidirectional_level. */ + unsigned char cpp_warn_bidirectional; + /* Dependency generation. */ struct { @@ -642,7 +655,8 @@ enum cpp_warning_reason { CPP_W_C90_C99_COMPAT, CPP_W_C11_C2X_COMPAT, CPP_W_CXX11_COMPAT, - CPP_W_EXPANSION_TO_DEFINED + CPP_W_EXPANSION_TO_DEFINED, + CPP_W_BIDIRECTIONAL }; /* Callback for header lookup for HEADER, which is the name of a @@ -739,10 +753,18 @@ struct cpp_callbacks #ifdef VMS #define INO_T_CPP ino_t ino[3] +#elif defined (_AIX) && SIZEOF_INO_T == 4 +#define INO_T_CPP ino64_t ino #else #define INO_T_CPP ino_t ino #endif +#if defined (_AIX) && SIZEOF_DEV_T == 4 +#define DEV_T_CPP dev64_t dev +#else +#define DEV_T_CPP dev_t dev +#endif + /* Chain of directories to look for include files in. */ struct cpp_dir { @@ -777,7 +799,7 @@ struct cpp_dir /* The C front end uses these to recognize duplicated directories in the search path. */ INO_T_CPP; - dev_t dev; + DEV_T_CPP; }; /* The kind of the cpp_macro. */ @@ -1174,7 +1196,7 @@ extern int cpp_defined (cpp_reader *, const unsigned char *, int); the double integer are set to zero. */ /* This type has to be equal to unsigned HOST_WIDE_INT, see - gcc/c-family/c-lex.c. */ + gcc/c-family/c-lex.cc. */ typedef uint64_t cpp_num_part; typedef struct cpp_num cpp_num; struct cpp_num @@ -1267,6 +1289,14 @@ extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason, const char *msgid, ...) ATTRIBUTE_PRINTF_3; +/* As their counterparts above, but use RICHLOC. */ +extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason, + rich_location *richloc, const char *msgid, ...) + ATTRIBUTE_PRINTF_4; +extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason, + rich_location *richloc, const char *msgid, ...) + ATTRIBUTE_PRINTF_4; + /* Output a diagnostic with "MSGID: " preceding the error string of errno. No location is printed. */ extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level, @@ -1304,7 +1334,7 @@ extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level, rich_location *richloc, const char *msgid, ...) ATTRIBUTE_PRINTF_4; -/* In lex.c */ +/* In lex.cc */ extern int cpp_ideq (const cpp_token *, const char *); extern void cpp_output_line (cpp_reader *, FILE *); extern unsigned char *cpp_output_line_to_string (cpp_reader *, @@ -1361,7 +1391,7 @@ extern cpp_hashnode *cpp_lookup (cpp_reader *, const unsigned char *, typedef int (*cpp_cb) (cpp_reader *, cpp_hashnode *, void *); extern void cpp_forall_identifiers (cpp_reader *, cpp_cb, void *); -/* In macro.c */ +/* In macro.cc */ extern void cpp_scan_nooutput (cpp_reader *); extern int cpp_sys_macro_p (cpp_reader *); extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *, @@ -1369,7 +1399,7 @@ extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *, extern bool cpp_compare_macros (const cpp_macro *macro1, const cpp_macro *macro2); -/* In files.c */ +/* In files.cc */ extern bool cpp_included (cpp_reader *, const char *); extern bool cpp_included_before (cpp_reader *, const char *, location_t); extern void cpp_make_system_header (cpp_reader *, int, int); @@ -1397,7 +1427,7 @@ struct cpp_converted_source cpp_converted_source cpp_get_converted_source (const char *fname, const char *input_charset); -/* In pch.c */ +/* In pch.cc */ struct save_macro_data; extern int cpp_save_state (cpp_reader *, FILE *); extern int cpp_write_pch_deps (cpp_reader *, FILE *); @@ -1407,7 +1437,7 @@ extern void cpp_prepare_state (cpp_reader *, struct save_macro_data **); extern int cpp_read_state (cpp_reader *, const char *, FILE *, struct save_macro_data *); -/* In lex.c */ +/* In lex.cc */ extern void cpp_force_token_locations (cpp_reader *, location_t); extern void cpp_stop_forcing_token_locations (cpp_reader *); enum CPP_DO_task @@ -1423,7 +1453,7 @@ extern void cpp_directive_only_process (cpp_reader *pfile, CPP_DO_task, void *data, ...)); -/* In expr.c */ +/* In expr.cc */ extern enum cpp_ttype cpp_userdef_string_remove_type (enum cpp_ttype type); extern enum cpp_ttype cpp_userdef_string_add_type @@ -1439,45 +1469,97 @@ extern bool cpp_userdef_char_p extern const char * cpp_get_userdef_suffix (const cpp_token *); -/* In charset.c */ +/* In charset.cc */ + +/* The result of attempting to decode a run of UTF-8 bytes. */ + +struct cpp_decoded_char +{ + const char *m_start_byte; + const char *m_next_byte; + + bool m_valid_ch; + cppchar_t m_ch; +}; + +/* Information for mapping between code points and display columns. + + This is a tabstop value, along with a callback for getting the + widths of characters. Normally this callback is cpp_wcwidth, but we + support other schemes for escaping non-ASCII unicode as a series of + ASCII chars when printing the user's source code in diagnostic-show-locus.cc + + For example, consider: + - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80) + - the Unicode character U+1F642 "SLIGHTLY SMILING FACE" + (UTF-8: 0xF0 0x9F 0x99 0x82) + - the byte 0xBF (a stray trailing byte of a UTF-8 character) + Normally U+03C0 would occupy one display column, U+1F642 + would occupy two display columns, and the stray byte would be + printed verbatim as one display column. + + However when escaping them as unicode code points as "<U+03C0>" + and "<U+1F642>" they occupy 8 and 9 display columns respectively, + and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>" + they occupy 8 and 16 display columns respectively. In both cases + the stray byte is escaped to <BF> as 4 display columns. */ + +struct cpp_char_column_policy +{ + cpp_char_column_policy (int tabstop, + int (*width_cb) (cppchar_t c)) + : m_tabstop (tabstop), + m_undecoded_byte_width (1), + m_width_cb (width_cb) + {} + + int m_tabstop; + /* Width in display columns of a stray byte that isn't decodable + as UTF-8. */ + int m_undecoded_byte_width; + int (*m_width_cb) (cppchar_t c); +}; /* A class to manage the state while converting a UTF-8 sequence to cppchar_t and computing the display width one character at a time. */ class cpp_display_width_computation { public: cpp_display_width_computation (const char *data, int data_length, - int tabstop); + const cpp_char_column_policy &policy); const char *next_byte () const { return m_next; } int bytes_processed () const { return m_next - m_begin; } int bytes_left () const { return m_bytes_left; } bool done () const { return !bytes_left (); } int display_cols_processed () const { return m_display_cols; } - int process_next_codepoint (); + int process_next_codepoint (cpp_decoded_char *out); int advance_display_cols (int n); private: const char *const m_begin; const char *m_next; size_t m_bytes_left; - const int m_tabstop; + const cpp_char_column_policy &m_policy; int m_display_cols; }; /* Convenience functions that are simple use cases for class cpp_display_width_computation. Tab characters will be expanded to spaces - as determined by TABSTOP. */ + as determined by POLICY.m_tabstop, and non-printable-ASCII characters + will be escaped as per POLICY. */ int cpp_byte_column_to_display_column (const char *data, int data_length, - int column, int tabstop); + int column, + const cpp_char_column_policy &policy); inline int cpp_display_width (const char *data, int data_length, - int tabstop) + const cpp_char_column_policy &policy) { return cpp_byte_column_to_display_column (data, data_length, data_length, - tabstop); + policy); } int cpp_display_column_to_byte_column (const char *data, int data_length, - int display_col, int tabstop); + int display_col, + const cpp_char_column_policy &policy); int cpp_wcwidth (cppchar_t c); bool cpp_input_conversion_is_trivial (const char *input_charset); diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h index 464494b..8033572 100644 --- a/libcpp/include/line-map.h +++ b/libcpp/include/line-map.h @@ -1,5 +1,5 @@ /* Map (unsigned int) keys to (source file, line, column) triples. - Copyright (C) 2001-2021 Free Software Foundation, Inc. + Copyright (C) 2001-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -83,7 +83,7 @@ enum lc_reason This key only has meaning in relation to a line_maps instance. Within gcc there is a single line_maps instance: "line_table", declared in - gcc/input.h and defined in gcc/input.c. + gcc/input.h and defined in gcc/input.cc. The values of the keys are intended to be internal to libcpp, but for ease-of-understanding the implementation, they are currently @@ -792,6 +792,9 @@ public: /* If true, prints an include trace a la -H. */ bool trace_includes; + /* True if we've seen a #line or # 44 "file" directive. */ + bool seen_line_directive; + /* Highest location_t "given out". */ location_t highest_location; @@ -803,11 +806,11 @@ public: unsigned int max_column_hint; /* The allocator to use when resizing 'maps', defaults to xrealloc. */ - line_map_realloc reallocator; + line_map_realloc GTY((callback)) reallocator; /* The allocators' function used to know the actual size it allocated, for a certain allocation size requested. */ - line_map_round_alloc_size_func round_alloc_size; + line_map_round_alloc_size_func GTY((callback)) round_alloc_size; struct location_adhoc_data_map location_adhoc_data_map; @@ -815,9 +818,6 @@ public: built-in tokens. */ location_t builtin_location; - /* True if we've seen a #line or # 44 "file" directive. */ - bool seen_line_directive; - /* The default value of range_bits in ordinary line maps. */ unsigned int default_range_bits; @@ -1787,6 +1787,18 @@ class rich_location const diagnostic_path *get_path () const { return m_path; } void set_path (const diagnostic_path *path) { m_path = path; } + /* A flag for hinting that the diagnostic involves character encoding + issues, and thus that it will be helpful to the user if we show some + representation of how the characters in the pertinent source lines + are encoded. + The default is false (i.e. do not escape). + When set to true, non-ASCII bytes in the pertinent source lines will + be escaped in a manner controlled by the user-supplied option + -fdiagnostics-escape-format=, so that the user can better understand + what's going on with the encoding in their source file. */ + bool escape_on_output_p () const { return m_escape_on_output; } + void set_escape_on_output (bool flag) { m_escape_on_output = flag; } + private: bool reject_impossible_fixit (location_t where); void stop_supporting_fixits (); @@ -1804,14 +1816,15 @@ protected: int m_column_override; bool m_have_expanded_location; + bool m_seen_impossible_fixit; + bool m_fixits_cannot_be_auto_applied; + bool m_escape_on_output; + expanded_location m_expanded_location; static const int MAX_STATIC_FIXIT_HINTS = 2; semi_embedded_vec <fixit_hint *, MAX_STATIC_FIXIT_HINTS> m_fixit_hints; - bool m_seen_impossible_fixit; - bool m_fixits_cannot_be_auto_applied; - const diagnostic_path *m_path; }; @@ -2093,8 +2106,8 @@ enum location_aspect /* The rich_location class requires a way to expand location_t instances. We would directly use expand_location_to_spelling_point, which is - implemented in gcc/input.c, but we also need to use it for rich_location - within genmatch.c. + implemented in gcc/input.cc, but we also need to use it for rich_location + within genmatch.cc. Hence we require client code of libcpp to implement the following symbol. */ extern expanded_location diff --git a/libcpp/include/mkdeps.h b/libcpp/include/mkdeps.h index 007f74e..96d6464 100644 --- a/libcpp/include/mkdeps.h +++ b/libcpp/include/mkdeps.h @@ -1,5 +1,5 @@ /* Dependency generator for Makefile fragments. - Copyright (C) 2000-2021 Free Software Foundation, Inc. + Copyright (C) 2000-2022 Free Software Foundation, Inc. Contributed by Zack Weinberg, Mar 2000 This program is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ along with this program; see the file COPYING3. If not see #include "cpplib.h" -/* This is the data structure used by all the functions in mkdeps.c. +/* This is the data structure used by all the functions in mkdeps.cc. It's quite straightforward, but should be treated as opaque. */ class mkdeps; diff --git a/libcpp/include/symtab.h b/libcpp/include/symtab.h index 6905753..53efe6c 100644 --- a/libcpp/include/symtab.h +++ b/libcpp/include/symtab.h @@ -1,5 +1,5 @@ /* Hash tables. - Copyright (C) 2000-2021 Free Software Foundation, Inc. + Copyright (C) 2000-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the diff --git a/libcpp/init.c b/libcpp/init.cc index 5a424e2..f4ab83d 100644 --- a/libcpp/init.c +++ b/libcpp/init.cc @@ -1,5 +1,5 @@ /* CPP Library. - Copyright (C) 1986-2021 Free Software Foundation, Inc. + Copyright (C) 1986-2022 Free Software Foundation, Inc. Contributed by Per Bothner, 1994-95. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 @@ -82,7 +82,6 @@ struct lang_flags char extended_numbers; char extended_identifiers; char c11_identifiers; - char cxx23_identifiers; char std; char digraphs; char uliterals; @@ -100,31 +99,31 @@ struct lang_flags }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 c++23 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX98 */ { 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX11 */ { 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX14 */ { 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0 }, - /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 }, - /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 }, - /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */ + /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, + /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, + /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, + /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, + /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1 }, + /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, + /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, + /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, + /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, + /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, + /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 }, + /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, + /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, + /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, + /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, + /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 }, + /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, + /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, + /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0 }, + /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, + /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, + /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 }, + /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 }, + /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /* Sets internal flags correctly for a given language. */ @@ -140,7 +139,6 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang) CPP_OPTION (pfile, extended_numbers) = l->extended_numbers; CPP_OPTION (pfile, extended_identifiers) = l->extended_identifiers; CPP_OPTION (pfile, c11_identifiers) = l->c11_identifiers; - CPP_OPTION (pfile, cxx23_identifiers) = l->cxx23_identifiers; CPP_OPTION (pfile, std) = l->std; CPP_OPTION (pfile, digraphs) = l->digraphs; CPP_OPTION (pfile, uliterals) = l->uliterals; @@ -223,6 +221,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, = ENABLE_CANONICAL_SYSTEM_HEADERS; CPP_OPTION (pfile, ext_numeric_literals) = 1; CPP_OPTION (pfile, warn_date_time) = 0; + CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; /* Default CPP arithmetic to something sensible for the host for the benefit of dumb users like fix-header. */ @@ -383,7 +382,7 @@ cpp_destroy (cpp_reader *pfile) There are two tables of these. builtin_array holds all the "builtin" macros: these are handled by builtin_macro() in - macro.c. Builtin is somewhat of a misnomer -- the property of + macro.cc. Builtin is somewhat of a misnomer -- the property of interest is that these macros require special code to compute their expansions. The value is a "cpp_builtin_type" enumerator. @@ -413,7 +412,7 @@ static const struct builtin_macro builtin_array[] = B("__INCLUDE_LEVEL__", BT_INCLUDE_LEVEL, true), B("__COUNTER__", BT_COUNTER, true), /* Make sure to update the list of built-in - function-like macros in traditional.c: + function-like macros in traditional.cc: fun_like_macro() when adding more following */ B("__has_attribute", BT_HAS_ATTRIBUTE, true), B("__has_c_attribute", BT_HAS_STD_ATTRIBUTE, true), @@ -849,7 +848,7 @@ cpp_finish (cpp_reader *pfile, FILE *deps_stream) if (CPP_OPTION (pfile, warn_unused_macros)) cpp_forall_identifiers (pfile, _cpp_warn_if_unused_macro, NULL); - /* lex.c leaves the final buffer on the stack. This it so that + /* lex.cc leaves the final buffer on the stack. This it so that it returns an unending stream of CPP_EOFs to the client. If we popped the buffer, we'd dereference a NULL buffer pointer and segfault. It's nice to allow the client to do worry-free excess diff --git a/libcpp/internal.h b/libcpp/internal.h index fd44de6..badfd1b 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -1,5 +1,5 @@ /* Part of CPP library. - Copyright (C) 1997-2021 Free Software Foundation, Inc. + Copyright (C) 1997-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -287,6 +287,9 @@ struct lexer_state /* Nonzero if the deferred pragma being handled allows macro expansion. */ unsigned char pragma_allow_expansion; + + /* Nonzero if _Pragma should not be interpreted. */ + unsigned char ignore__Pragma; }; /* Special nodes - identifiers with predefined significance. */ @@ -531,15 +534,15 @@ struct cpp_reader cpp_token avoid_paste; cpp_token endarg; - /* Opaque handle to the dependencies of mkdeps.c. */ + /* Opaque handle to the dependencies of mkdeps.cc. */ class mkdeps *deps; /* Obstack holding all macro hash nodes. This never shrinks. - See identifiers.c */ + See identifiers.cc */ struct obstack hash_ob; /* Obstack holding buffer and conditional structures. This is a - real stack. See directives.c. */ + real stack. See directives.cc. */ struct obstack buffer_ob; /* Pragma table - dynamic, because a library user can add to the @@ -574,7 +577,7 @@ struct cpp_reader location_t first_line; } out; - /* Used for buffer overlays by traditional.c. */ + /* Used for buffer overlays by traditional.cc. */ const unsigned char *saved_cur, *saved_rlimit, *saved_line_base; /* A saved list of the defined macros, for dependency checking @@ -597,6 +600,14 @@ struct cpp_reader /* Location identifying the main source file -- intended to be line zero of said file. */ location_t main_loc; + + /* Returns true iff we should warn about UTF-8 bidirectional control + characters. */ + bool warn_bidi_p () const + { + return (CPP_OPTION (this, cpp_warn_bidirectional) + & (bidirectional_unpaired|bidirectional_any)); + } }; /* Character classes. Based on the more primitive macros in safe-ctype.h. @@ -604,7 +615,7 @@ struct cpp_reader definition of a pp-number in the C standard [section 6.4.8 of C99]. In the unlikely event that characters other than \r and \n enter - the set is_vspace, the macro handle_newline() in lex.c must be + the set is_vspace, the macro handle_newline() in lex.cc must be updated. */ #define _dollar_ok(x) ((x) == '$' && CPP_OPTION (pfile, dollars_in_ident)) @@ -663,7 +674,7 @@ inline bool _cpp_defined_macro_p (cpp_hashnode *node) return cpp_macro_p (node) && !(node->flags & NODE_CONDITIONAL); } -/* In macro.c */ +/* In macro.cc */ extern bool _cpp_notify_macro_use (cpp_reader *pfile, cpp_hashnode *node, location_t); inline bool _cpp_maybe_notify_macro_use (cpp_reader *pfile, cpp_hashnode *node, @@ -692,11 +703,11 @@ extern void _cpp_push_token_context (cpp_reader *, cpp_hashnode *, const cpp_token *, unsigned int); extern void _cpp_backup_tokens_direct (cpp_reader *, unsigned int); -/* In identifiers.c */ +/* In identifiers.cc */ extern void _cpp_init_hashtable (cpp_reader *, cpp_hash_table *); extern void _cpp_destroy_hashtable (cpp_reader *); -/* In files.c */ +/* In files.cc */ enum _cpp_find_file_kind { _cpp_FFK_NORMAL, _cpp_FFK_FAKE, _cpp_FFK_PRE_INCLUDE, _cpp_FFK_HAS_INCLUDE }; extern _cpp_file *_cpp_find_file (cpp_reader *, const char *, cpp_dir *, @@ -722,11 +733,11 @@ extern struct stat *_cpp_get_file_stat (_cpp_file *); extern bool _cpp_has_header (cpp_reader *, const char *, int, enum include_type); -/* In expr.c */ +/* In expr.cc */ extern bool _cpp_parse_expr (cpp_reader *, bool); extern struct op *_cpp_expand_op_stack (cpp_reader *); -/* In lex.c */ +/* In lex.cc */ extern void _cpp_process_line_notes (cpp_reader *, int); extern void _cpp_clean_line (cpp_reader *); extern bool _cpp_get_fresh_line (cpp_reader *); @@ -749,13 +760,13 @@ static inline void *_cpp_reserve_room (cpp_reader *pfile, size_t have, } extern void *_cpp_commit_buff (cpp_reader *pfile, size_t size); -/* In init.c. */ +/* In init.cc. */ extern void _cpp_maybe_push_include_file (cpp_reader *); extern const char *cpp_named_operator2name (enum cpp_ttype type); extern void _cpp_restore_special_builtin (cpp_reader *pfile, struct def_pragma_macro *); -/* In directives.c */ +/* In directives.cc */ extern int _cpp_test_assertion (cpp_reader *, unsigned int *); extern int _cpp_handle_directive (cpp_reader *, bool); extern void _cpp_define_builtin (cpp_reader *, const char *); @@ -769,7 +780,10 @@ extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *, extern void _cpp_pop_buffer (cpp_reader *); extern char *_cpp_bracket_include (cpp_reader *); -/* In traditional.c. */ +/* In errors.cc */ +extern location_t cpp_diagnostic_get_current_location (cpp_reader *); + +/* In traditional.cc. */ extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *, bool); extern bool _cpp_read_logical_line_trad (cpp_reader *); extern void _cpp_overlay_buffer (cpp_reader *pfile, const unsigned char *, @@ -782,7 +796,7 @@ extern unsigned char *_cpp_copy_replacement_text (const cpp_macro *, unsigned char *); extern size_t _cpp_replacement_text_len (const cpp_macro *); -/* In charset.c. */ +/* In charset.cc. */ /* The normalization state at this point in the sequence. It starts initialized to all zeros, and at the end @@ -887,7 +901,7 @@ ufputs (const unsigned char *s, FILE *f) return fputs ((const char *)s, f); } -/* In line-map.c. */ +/* In line-map.cc. */ /* Create and return a virtual location for a token that is part of a macro expansion-list at a macro expansion point. See the comment @@ -935,6 +949,26 @@ int linemap_get_expansion_line (class line_maps *, const char* linemap_get_expansion_filename (class line_maps *, location_t); +/* A subclass of rich_location for emitting a diagnostic + at the current location of the reader, but flagging + it with set_escape_on_output (true). */ +class encoding_rich_location : public rich_location +{ + public: + encoding_rich_location (cpp_reader *pfile) + : rich_location (pfile->line_table, + cpp_diagnostic_get_current_location (pfile)) + { + set_escape_on_output (true); + } + + encoding_rich_location (cpp_reader *pfile, location_t loc) + : rich_location (pfile->line_table, loc) + { + set_escape_on_output (true); + } +}; + #ifdef __cplusplus } #endif diff --git a/libcpp/lex.c b/libcpp/lex.cc index 8e3ef09..fb1dfab 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.cc @@ -1,5 +1,5 @@ /* CPP Library - lexical analysis. - Copyright (C) 2000-2021 Free Software Foundation, Inc. + Copyright (C) 2000-2022 Free Software Foundation, Inc. Contributed by Per Bothner, 1994-95. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 @@ -1164,6 +1164,479 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment) } } +namespace bidi { + enum class kind { + NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL + }; + + /* All the UTF-8 encodings of bidi characters start with E2. */ + constexpr uchar utf8_start = 0xe2; + + struct context + { + context () {} + context (location_t loc, kind k, bool pdf, bool ucn) + : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn) + { + } + + kind get_pop_kind () const + { + return m_pdf ? kind::PDF : kind::PDI; + } + bool ucn_p () const + { + return m_ucn; + } + + location_t m_loc; + kind m_kind; + unsigned m_pdf : 1; + unsigned m_ucn : 1; + }; + + /* A vector holding currently open bidi contexts. We use a char for + each context, its LSB is 1 if it represents a PDF context, 0 if it + represents a PDI context. The next bit is 1 if this context was open + by a bidi character written as a UCN, and 0 when it was UTF-8. */ + semi_embedded_vec <context, 16> vec; + + /* Close the whole comment/identifier/string literal/character constant + context. */ + void on_close () + { + vec.truncate (0); + } + + /* Pop the last element in the vector. */ + void pop () + { + unsigned int len = vec.count (); + gcc_checking_assert (len > 0); + vec.truncate (len - 1); + } + + /* Return the pop kind of the context of the Ith element. */ + kind pop_kind_at (unsigned int i) + { + return vec[i].get_pop_kind (); + } + + /* Return the pop kind of the context that is currently opened. */ + kind current_ctx () + { + unsigned int len = vec.count (); + if (len == 0) + return kind::NONE; + return vec[len - 1].get_pop_kind (); + } + + /* Return true if the current context comes from a UCN origin, that is, + the bidi char which started this bidi context was written as a UCN. */ + bool current_ctx_ucn_p () + { + unsigned int len = vec.count (); + gcc_checking_assert (len > 0); + return vec[len - 1].m_ucn; + } + + location_t current_ctx_loc () + { + unsigned int len = vec.count (); + gcc_checking_assert (len > 0); + return vec[len - 1].m_loc; + } + + /* We've read a bidi char, update the current vector as necessary. + LOC is only valid when K is not kind::NONE. */ + void on_char (kind k, bool ucn_p, location_t loc) + { + switch (k) + { + case kind::LRE: + case kind::RLE: + case kind::LRO: + case kind::RLO: + vec.push (context (loc, k, true, ucn_p)); + break; + case kind::LRI: + case kind::RLI: + case kind::FSI: + vec.push (context (loc, k, false, ucn_p)); + break; + /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO + whose scope has not yet been terminated. */ + case kind::PDF: + if (current_ctx () == kind::PDF) + pop (); + break; + /* PDI terminates the scope of the last LRI, RLI, or FSI whose + scope has not yet been terminated, as well as the scopes of + any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not + yet been terminated. */ + case kind::PDI: + for (int i = vec.count () - 1; i >= 0; --i) + if (pop_kind_at (i) == kind::PDI) + { + vec.truncate (i); + break; + } + break; + case kind::LTR: + case kind::RTL: + /* These aren't popped by a PDF/PDI. */ + break; + ATTR_LIKELY case kind::NONE: + break; + default: + abort (); + } + } + + /* Return a descriptive string for K. */ + const char *to_str (kind k) + { + switch (k) + { + case kind::LRE: + return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; + case kind::RLE: + return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; + case kind::LRO: + return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; + case kind::RLO: + return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; + case kind::LRI: + return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; + case kind::RLI: + return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; + case kind::FSI: + return "U+2068 (FIRST STRONG ISOLATE)"; + case kind::PDF: + return "U+202C (POP DIRECTIONAL FORMATTING)"; + case kind::PDI: + return "U+2069 (POP DIRECTIONAL ISOLATE)"; + case kind::LTR: + return "U+200E (LEFT-TO-RIGHT MARK)"; + case kind::RTL: + return "U+200F (RIGHT-TO-LEFT MARK)"; + default: + abort (); + } + } +} + +/* Get location_t for the range of bytes [START, START + NUM_BYTES) + within the current line in FILE, with the caret at START. */ + +static location_t +get_location_for_byte_range_in_cur_line (cpp_reader *pfile, + const unsigned char *const start, + size_t num_bytes) +{ + gcc_checking_assert (num_bytes > 0); + + /* CPP_BUF_COLUMN and linemap_position_for_column both refer + to offsets in bytes, but CPP_BUF_COLUMN is 0-based, + whereas linemap_position_for_column is 1-based. */ + + /* Get 0-based offsets within the line. */ + size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start); + size_t end_offset = start_offset + num_bytes - 1; + + /* Now convert to location_t, where "columns" are 1-based byte offsets. */ + location_t start_loc = linemap_position_for_column (pfile->line_table, + start_offset + 1); + location_t end_loc = linemap_position_for_column (pfile->line_table, + end_offset + 1); + + if (start_loc == end_loc) + return start_loc; + + source_range src_range; + src_range.m_start = start_loc; + src_range.m_finish = end_loc; + location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table, + start_loc, + src_range, + NULL); + return combined_loc; +} + +/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ + +static bidi::kind +get_bidi_utf8_1 (const unsigned char *const p) +{ + gcc_checking_assert (p[0] == bidi::utf8_start); + + if (p[1] == 0x80) + switch (p[2]) + { + case 0xaa: + return bidi::kind::LRE; + case 0xab: + return bidi::kind::RLE; + case 0xac: + return bidi::kind::PDF; + case 0xad: + return bidi::kind::LRO; + case 0xae: + return bidi::kind::RLO; + case 0x8e: + return bidi::kind::LTR; + case 0x8f: + return bidi::kind::RTL; + default: + break; + } + else if (p[1] == 0x81) + switch (p[2]) + { + case 0xa6: + return bidi::kind::LRI; + case 0xa7: + return bidi::kind::RLI; + case 0xa8: + return bidi::kind::FSI; + case 0xa9: + return bidi::kind::PDI; + default: + break; + } + + return bidi::kind::NONE; +} + +/* Parse a sequence of 3 bytes starting with P and return its bidi code. + If the kind is not NONE, write the location to *OUT.*/ + +static bidi::kind +get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out) +{ + bidi::kind result = get_bidi_utf8_1 (p); + if (result != bidi::kind::NONE) + { + /* We have a sequence of 3 bytes starting at P. */ + *out = get_location_for_byte_range_in_cur_line (pfile, p, 3); + } + return result; +} + +/* Parse a UCN where P points just past \u or \U and return its bidi code. */ + +static bidi::kind +get_bidi_ucn_1 (const unsigned char *p, bool is_U) +{ + /* 6.4.3 Universal Character Names + \u hex-quad + \U hex-quad hex-quad + where \unnnn means \U0000nnnn. */ + + if (is_U) + { + if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') + return bidi::kind::NONE; + /* Skip 4B so we can treat \u and \U the same below. */ + p += 4; + } + + /* All code points we are looking for start with 20xx. */ + if (p[0] != '2' || p[1] != '0') + return bidi::kind::NONE; + else if (p[2] == '2') + switch (p[3]) + { + case 'a': + case 'A': + return bidi::kind::LRE; + case 'b': + case 'B': + return bidi::kind::RLE; + case 'c': + case 'C': + return bidi::kind::PDF; + case 'd': + case 'D': + return bidi::kind::LRO; + case 'e': + case 'E': + return bidi::kind::RLO; + default: + break; + } + else if (p[2] == '6') + switch (p[3]) + { + case '6': + return bidi::kind::LRI; + case '7': + return bidi::kind::RLI; + case '8': + return bidi::kind::FSI; + case '9': + return bidi::kind::PDI; + default: + break; + } + else if (p[2] == '0') + switch (p[3]) + { + case 'e': + case 'E': + return bidi::kind::LTR; + case 'f': + case 'F': + return bidi::kind::RTL; + default: + break; + } + + return bidi::kind::NONE; +} + +/* Parse a UCN where P points just past \u or \U and return its bidi code. + If the kind is not NONE, write the location to *OUT.*/ + +static bidi::kind +get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U, + location_t *out) +{ + bidi::kind result = get_bidi_ucn_1 (p, is_U); + if (result != bidi::kind::NONE) + { + const unsigned char *start = p - 2; + size_t num_bytes = 2 + (is_U ? 8 : 4); + *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes); + } + return result; +} + +/* Subclass of rich_location for reporting on unpaired UTF-8 + bidirectional control character(s). + Escape the source lines on output, and show all unclosed + bidi context, labelling everything. */ + +class unpaired_bidi_rich_location : public rich_location +{ + public: + class custom_range_label : public range_label + { + public: + label_text get_text (unsigned range_idx) const FINAL OVERRIDE + { + /* range 0 is the primary location; each subsequent range i + 1 + is for bidi::vec[i]. */ + if (range_idx > 0) + { + const bidi::context &ctxt (bidi::vec[range_idx - 1]); + return label_text::borrow (bidi::to_str (ctxt.m_kind)); + } + else + return label_text::borrow (_("end of bidirectional context")); + } + }; + + unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc) + : rich_location (pfile->line_table, loc, &m_custom_label) + { + set_escape_on_output (true); + for (unsigned i = 0; i < bidi::vec.count (); i++) + add_range (bidi::vec[i].m_loc, + SHOW_RANGE_WITHOUT_CARET, + &m_custom_label); + } + + private: + custom_range_label m_custom_label; +}; + +/* We're closing a bidi context, that is, we've encountered a newline, + are closing a C-style comment, or are at the end of a string literal, + character constant, or identifier. Warn if this context was not + properly terminated by a PDI or PDF. P points to the last character + in this context. */ + +static void +maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) +{ + const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); + if (bidi::vec.count () > 0 + && (warn_bidi & bidirectional_unpaired + && (!bidi::current_ctx_ucn_p () + || (warn_bidi & bidirectional_ucn)))) + { + const location_t loc + = linemap_position_for_column (pfile->line_table, + CPP_BUF_COLUMN (pfile->buffer, p)); + unpaired_bidi_rich_location rich_loc (pfile, loc); + /* cpp_callbacks doesn't yet have a way to handle singular vs plural + forms of a diagnostic, so fake it for now. */ + if (bidi::vec.count () > 1) + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "unpaired UTF-8 bidirectional control characters " + "detected"); + else + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "unpaired UTF-8 bidirectional control character " + "detected"); + } + /* We're done with this context. */ + bidi::on_close (); +} + +/* We're at the beginning or in the middle of an identifier/comment/string + literal/character constant. Warn if we've encountered a bidi character. + KIND says which bidi control character it was; UCN_P is true iff this bidi + control character was written as a UCN. LOC is the location of the + character, but is only valid if KIND != bidi::kind::NONE. */ + +static void +maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind, + bool ucn_p, location_t loc) +{ + if (__builtin_expect (kind == bidi::kind::NONE, 1)) + return; + + const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); + + if (warn_bidi & (bidirectional_unpaired|bidirectional_any)) + { + rich_location rich_loc (pfile->line_table, loc); + rich_loc.set_escape_on_output (true); + + /* It seems excessive to warn about a PDI/PDF that is closing + an opened context because we've already warned about the + opening character. Except warn when we have a UCN x UTF-8 + mismatch, if UCN checking is enabled. */ + if (kind == bidi::current_ctx ()) + { + if (warn_bidi == (bidirectional_unpaired|bidirectional_ucn) + && bidi::current_ctx_ucn_p () != ucn_p) + { + rich_loc.add_range (bidi::current_ctx_loc ()); + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "UTF-8 vs UCN mismatch when closing " + "a context by \"%s\"", bidi::to_str (kind)); + } + } + else if (warn_bidi & bidirectional_any + && (!ucn_p || (warn_bidi & bidirectional_ucn))) + { + if (kind == bidi::kind::PDF || kind == bidi::kind::PDI) + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "\"%s\" is closing an unopened context", + bidi::to_str (kind)); + else + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "found problematic Unicode character \"%s\"", + bidi::to_str (kind)); + } + } + /* We're done with this context. */ + bidi::on_char (kind, ucn_p, loc); +} + /* Skip a C-style block comment. We find the end of the comment by seeing if an asterisk is before every '/' we encounter. Returns nonzero if comment terminated by EOF, zero otherwise. @@ -1175,6 +1648,7 @@ _cpp_skip_block_comment (cpp_reader *pfile) cpp_buffer *buffer = pfile->buffer; const uchar *cur = buffer->cur; uchar c; + const bool warn_bidi_p = pfile->warn_bidi_p (); cur++; if (*cur == '/') @@ -1189,7 +1663,11 @@ _cpp_skip_block_comment (cpp_reader *pfile) if (c == '/') { if (cur[-2] == '*') - break; + { + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, cur); + break; + } /* Warn about potential nested comments, but not if the '/' comes immediately before the true comment delimiter. @@ -1208,6 +1686,8 @@ _cpp_skip_block_comment (cpp_reader *pfile) { unsigned int cols; buffer->cur = cur - 1; + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, cur); _cpp_process_line_notes (pfile, true); if (buffer->next_line >= buffer->rlimit) return true; @@ -1218,6 +1698,14 @@ _cpp_skip_block_comment (cpp_reader *pfile) cur = buffer->cur; } + /* If this is a beginning of a UTF-8 encoding, it might be + a bidirectional control character. */ + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) + { + location_t loc; + bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc); + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); + } } buffer->cur = cur; @@ -1233,9 +1721,31 @@ skip_line_comment (cpp_reader *pfile) { cpp_buffer *buffer = pfile->buffer; location_t orig_line = pfile->line_table->highest_line; + const bool warn_bidi_p = pfile->warn_bidi_p (); - while (*buffer->cur != '\n') - buffer->cur++; + if (!warn_bidi_p) + while (*buffer->cur != '\n') + buffer->cur++; + else + { + while (*buffer->cur != '\n' + && *buffer->cur != bidi::utf8_start) + buffer->cur++; + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) + { + while (*buffer->cur != '\n') + { + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) + { + location_t loc; + bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc); + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); + } + buffer->cur++; + } + maybe_warn_bidi_on_close (pfile, buffer->cur); + } + } _cpp_process_line_notes (pfile, true); return orig_line != pfile->line_table->highest_line; @@ -1268,7 +1778,11 @@ skip_whitespace (cpp_reader *pfile, cppchar_t c) while (is_nvspace (c)); if (saw_NUL) - cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); + { + encoding_rich_location rich_loc (pfile); + cpp_error_at (pfile, CPP_DL_WARNING, &rich_loc, + "null character(s) ignored"); + } buffer->cur--; } @@ -1297,6 +1811,28 @@ warn_about_normalization (cpp_reader *pfile, if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) && !pfile->state.skipping) { + location_t loc = token->src_loc; + + /* If possible, create a location range for the token. */ + if (loc >= RESERVED_LOCATION_COUNT + && token->type != CPP_EOF + /* There must be no line notes to process. */ + && (!(pfile->buffer->cur + >= pfile->buffer->notes[pfile->buffer->cur_note].pos + && !pfile->overlaid_buffer))) + { + source_range tok_range; + tok_range.m_start = loc; + tok_range.m_finish + = linemap_position_for_column (pfile->line_table, + CPP_BUF_COLUMN (pfile->buffer, + pfile->buffer->cur)); + loc = COMBINE_LOCATION_DATA (pfile->line_table, + loc, tok_range, NULL); + } + + encoding_rich_location rich_loc (pfile, loc); + /* Make sure that the token is printed using UCNs, even if we'd otherwise happily print UTF-8. */ unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); @@ -1304,14 +1840,14 @@ warn_about_normalization (cpp_reader *pfile, sz = cpp_spell_token (pfile, token, buf, false) - buf; if (NORMALIZE_STATE_RESULT (s) == normalized_C) - cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0, - "`%.*s' is not in NFKC", (int) sz, buf); - else if (CPP_OPTION (pfile, cxx23_identifiers)) - cpp_pedwarning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0, + cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc, + "`%.*s' is not in NFKC", (int) sz, buf); + else if (CPP_OPTION (pfile, cplusplus)) + cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc, "`%.*s' is not in NFC", (int) sz, buf); else - cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0, - "`%.*s' is not in NFC", (int) sz, buf); + cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc, + "`%.*s' is not in NFC", (int) sz, buf); free (buf); } } @@ -1320,11 +1856,13 @@ static const cppchar_t utf8_signifier = 0xC0; /* Returns TRUE if the sequence starting at buffer->cur is valid in an identifier. FIRST is TRUE if this starts an identifier. */ + static bool forms_identifier_p (cpp_reader *pfile, int first, struct normalize_state *state) { cpp_buffer *buffer = pfile->buffer; + const bool warn_bidi_p = pfile->warn_bidi_p (); if (*buffer->cur == '$') { @@ -1347,6 +1885,13 @@ forms_identifier_p (cpp_reader *pfile, int first, cppchar_t s; if (*buffer->cur >= utf8_signifier) { + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) + && warn_bidi_p) + { + location_t loc; + bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc); + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); + } if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, state, &s)) return true; @@ -1355,6 +1900,15 @@ forms_identifier_p (cpp_reader *pfile, int first, && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) { buffer->cur += 2; + if (warn_bidi_p) + { + location_t loc; + bidi::kind kind = get_bidi_ucn (pfile, + buffer->cur, + buffer->cur[-1] == 'U', + &loc); + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc); + } if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, state, &s, NULL, NULL)) return true; @@ -1463,6 +2017,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, const uchar *cur; unsigned int len; unsigned int hash = HT_HASHSTEP (0, *base); + const bool warn_bidi_p = pfile->warn_bidi_p (); cur = pfile->buffer->cur; if (! starts_ucn) @@ -1486,6 +2041,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, pfile->buffer->cur++; } } while (forms_identifier_p (pfile, false, nst)); + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); result = _cpp_interpret_identifier (pfile, base, pfile->buffer->cur - base); *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); @@ -1732,6 +2289,7 @@ static void lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) { const uchar *pos = base; + const bool warn_bidi_p = pfile->warn_bidi_p (); /* 'tis a pity this information isn't passed down from the lexer's initial categorization of the token. */ @@ -1968,8 +2526,18 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) pos = base = pfile->buffer->cur; note = &pfile->buffer->notes[pfile->buffer->cur_note]; } + else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) + && warn_bidi_p) + { + location_t loc; + bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc); + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); + } } + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, pos); + if (CPP_OPTION (pfile, user_literals)) { /* If a string format macro, say from inttypes.h, is placed touching @@ -2064,15 +2632,29 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) else terminator = '>', type = CPP_HEADER_NAME; + const bool warn_bidi_p = pfile->warn_bidi_p (); for (;;) { cppchar_t c = *cur++; /* In #include-style directives, terminators are not escapable. */ if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') - cur++; + { + if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) + { + location_t loc; + bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U', + &loc); + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc); + } + cur++; + } else if (c == terminator) - break; + { + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, cur - 1); + break; + } else if (c == '\n') { cur--; @@ -2089,6 +2671,12 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) } else if (c == '\0') saw_NUL = true; + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) + { + location_t loc; + bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc); + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); + } } if (saw_NUL && !pfile->state.skipping) @@ -2946,7 +3534,21 @@ _cpp_lex_direct (cpp_reader *pfile) buffer = pfile->buffer; if (buffer->need_line) { - gcc_assert (!pfile->state.in_deferred_pragma); + if (pfile->state.in_deferred_pragma) + { + /* This can happen in cases like: + #define loop(x) whatever + #pragma omp loop + where when trying to expand loop we need to peek + next token after loop, but aren't still in_deferred_pragma + mode but are in in_directive mode, so buffer->need_line + is set, a CPP_EOF is peeked. */ + result->type = CPP_PRAGMA_EOL; + pfile->state.in_deferred_pragma = false; + if (!pfile->state.pragma_allow_expansion) + pfile->state.prevent_expansion--; + return result; + } if (!_cpp_get_fresh_line (pfile)) { result->type = CPP_EOF; @@ -4467,7 +5069,7 @@ cpp_directive_only_process (cpp_reader *pfile, break; case '*': - if (pos > peek && !esc) + if (pos > peek) star = is_block; esc = false; break; diff --git a/libcpp/line-map.c b/libcpp/line-map.cc index 1a6902a..62077c3 100644 --- a/libcpp/line-map.c +++ b/libcpp/line-map.cc @@ -1,5 +1,5 @@ /* Map (unsigned int) keys to (source file, line, column) triples. - Copyright (C) 2001-2021 Free Software Foundation, Inc. + Copyright (C) 2001-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -44,7 +44,7 @@ static location_t linemap_macro_loc_to_exp_point (line_maps *, location_t, const line_map_ordinary **); -/* Counters defined in macro.c. */ +/* Counters defined in macro.cc. */ extern unsigned num_expanded_macros_counter; extern unsigned num_macro_tokens_counter; @@ -374,7 +374,7 @@ linemap_check_files_exited (line_maps *set) for (const line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (set); ! MAIN_FILE_P (map); map = linemap_included_from_linemap (set, map)) - fprintf (stderr, "line-map.c: file \"%s\" entered but not left\n", + fprintf (stderr, "line-map.cc: file \"%s\" entered but not left\n", ORDINARY_MAP_FILE_NAME (map)); } @@ -410,7 +410,7 @@ line_map_new_raw (line_maps *set, bool macro_p, unsigned num) /* We are going to execute some dance to try to reduce the overhead of the memory allocator, in case we are using the - ggc-page.c one. + ggc-page.cc one. The actual size of memory we are going to get back from the allocator may well be larger than what we ask for. Use this @@ -2083,9 +2083,10 @@ rich_location::rich_location (line_maps *set, location_t loc, m_ranges (), m_column_override (0), m_have_expanded_location (false), - m_fixit_hints (), m_seen_impossible_fixit (false), m_fixits_cannot_be_auto_applied (false), + m_escape_on_output (false), + m_fixit_hints (), m_path (NULL) { add_range (loc, SHOW_RANGE_WITH_CARET, label); diff --git a/libcpp/macro.c b/libcpp/macro.cc index f214548..8ebf360 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.cc @@ -1,5 +1,5 @@ /* Part of CPP library. (Macro and #define handling.) - Copyright (C) 1986-2021 Free Software Foundation, Inc. + Copyright (C) 1986-2022 Free Software Foundation, Inc. Written by Per Bothner, 1994. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 @@ -295,7 +295,7 @@ static cpp_context *next_context (cpp_reader *); static const cpp_token *padding_token (cpp_reader *, const cpp_token *); static const cpp_token *new_string_token (cpp_reader *, uchar *, unsigned int); static const cpp_token *stringify_arg (cpp_reader *, const cpp_token **, - unsigned int, bool); + unsigned int); static void paste_all_tokens (cpp_reader *, const cpp_token *); static bool paste_tokens (cpp_reader *, location_t, const cpp_token **, const cpp_token *); @@ -750,8 +750,10 @@ builtin_macro (cpp_reader *pfile, cpp_hashnode *node, if (node->value.builtin == BT_PRAGMA) { /* Don't interpret _Pragma within directives. The standard is - not clear on this, but to me this makes most sense. */ - if (pfile->state.in_directive) + not clear on this, but to me this makes most sense. + Similarly, don't interpret _Pragma inside expand_args, we might + need to stringize it later on. */ + if (pfile->state.in_directive || pfile->state.ignore__Pragma) return 0; return _cpp_do__Pragma (pfile, loc); @@ -832,8 +834,7 @@ cpp_quote_string (uchar *dest, const uchar *src, unsigned int len) /* Convert a token sequence FIRST to FIRST+COUNT-1 to a single string token according to the rules of the ISO C #-operator. */ static const cpp_token * -stringify_arg (cpp_reader *pfile, const cpp_token **first, unsigned int count, - bool va_opt) +stringify_arg (cpp_reader *pfile, const cpp_token **first, unsigned int count) { unsigned char *dest; unsigned int i, escape_it, backslash_count = 0; @@ -850,24 +851,6 @@ stringify_arg (cpp_reader *pfile, const cpp_token **first, unsigned int count, { const cpp_token *token = first[i]; - if (va_opt && (token->flags & PASTE_LEFT)) - { - location_t virt_loc = pfile->invocation_location; - const cpp_token *rhs; - do - { - if (i == count) - abort (); - rhs = first[++i]; - if (!paste_tokens (pfile, virt_loc, &token, rhs)) - { - --i; - break; - } - } - while (rhs->flags & PASTE_LEFT); - } - if (token->type == CPP_PADDING) { if (source == NULL @@ -1001,6 +984,7 @@ paste_tokens (cpp_reader *pfile, location_t location, return false; } + lhs->flags |= (*plhs)->flags & (PREV_WHITE | PREV_FALLTHROUGH); *plhs = lhs; _cpp_pop_buffer (pfile); return true; @@ -1259,7 +1243,7 @@ collect_args (cpp_reader *pfile, const cpp_hashnode *node, else if (token->type == CPP_EOF || (token->type == CPP_HASH && token->flags & BOL)) break; - else if (token->type == CPP_PRAGMA) + else if (token->type == CPP_PRAGMA && !(token->flags & PRAGMA_OP)) { cpp_token *newtok = _cpp_temp_token (pfile); @@ -1389,8 +1373,11 @@ funlike_invocation_p (cpp_reader *pfile, cpp_hashnode *node, token = cpp_get_token (pfile); if (token->type != CPP_PADDING) break; + gcc_assert ((token->flags & PREV_WHITE) == 0); if (padding == NULL - || (!(padding->flags & PREV_WHITE) && token->val.source == NULL)) + || padding->val.source == NULL + || (!(padding->val.source->flags & PREV_WHITE) + && token->val.source == NULL)) padding = token; } @@ -1943,8 +1930,7 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, cpp_macro *macro, if (src->flags & STRINGIFY_ARG) { if (!arg->stringified) - arg->stringified = stringify_arg (pfile, arg->first, arg->count, - false); + arg->stringified = stringify_arg (pfile, arg->first, arg->count); } else if ((src->flags & PASTE_LEFT) || (src != macro->exp.tokens && (src[-1].flags & PASTE_LEFT))) @@ -2064,11 +2050,46 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, cpp_macro *macro, { unsigned int count = start ? paste_flag - start : tokens_buff_count (buff); - const cpp_token *t - = stringify_arg (pfile, - start ? start + 1 - : (const cpp_token **) (buff->base), - count, true); + const cpp_token **first + = start ? start + 1 + : (const cpp_token **) (buff->base); + unsigned int i, j; + + /* Paste any tokens that need to be pasted before calling + stringify_arg, because stringify_arg uses pfile->u_buff + which paste_tokens can use as well. */ + for (i = 0, j = 0; i < count; i++, j++) + { + const cpp_token *token = first[i]; + + if (token->flags & PASTE_LEFT) + { + location_t virt_loc = pfile->invocation_location; + const cpp_token *rhs; + do + { + if (i == count) + abort (); + rhs = first[++i]; + if (!paste_tokens (pfile, virt_loc, &token, rhs)) + { + --i; + break; + } + } + while (rhs->flags & PASTE_LEFT); + } + + first[j] = token; + } + if (j != i) + { + while (i-- != j) + tokens_buff_remove_last_token (buff); + count = j; + } + + const cpp_token *t = stringify_arg (pfile, first, count); while (count--) tokens_buff_remove_last_token (buff); if (src->flags & PASTE_LEFT) @@ -2076,8 +2097,14 @@ replace_args (cpp_reader *pfile, cpp_hashnode *node, cpp_macro *macro, tokens_buff_add_token (buff, virt_locs, t, t->src_loc, t->src_loc, NULL, 0); + continue; } - else if (src->flags & PASTE_LEFT) + if (start && paste_flag == start && (*start)->flags & PASTE_LEFT) + /* If __VA_OPT__ expands to nothing (either because __VA_ARGS__ + is empty or because it is __VA_OPT__() ), drop PASTE_LEFT + flag from previous token. */ + copy_paste_flag (pfile, start, &pfile->avoid_paste); + if (src->flags & PASTE_LEFT) { /* Don't avoid paste after all. */ while (paste_flag && paste_flag != start @@ -2648,6 +2675,7 @@ expand_arg (cpp_reader *pfile, macro_arg *arg) size_t capacity; bool saved_warn_trad; bool track_macro_exp_p = CPP_OPTION (pfile, track_macro_expansion); + bool saved_ignore__Pragma; if (arg->count == 0 || arg->expanded != NULL) @@ -2670,6 +2698,9 @@ expand_arg (cpp_reader *pfile, macro_arg *arg) push_ptoken_context (pfile, NULL, NULL, arg->first, arg->count + 1); + saved_ignore__Pragma = pfile->state.ignore__Pragma; + pfile->state.ignore__Pragma = 1; + for (;;) { const cpp_token *token; @@ -2692,6 +2723,7 @@ expand_arg (cpp_reader *pfile, macro_arg *arg) _cpp_pop_context (pfile); CPP_WTRADITIONAL (pfile) = saved_warn_trad; + pfile->state.ignore__Pragma = saved_ignore__Pragma; } /* Returns the macro associated to the current context if we are in diff --git a/libcpp/makeucnid.c b/libcpp/makeucnid.cc index b3a0aab..57c3beb 100644 --- a/libcpp/makeucnid.c +++ b/libcpp/makeucnid.cc @@ -1,5 +1,5 @@ /* Make ucnid.h from various sources. - Copyright (C) 2005-2021 Free Software Foundation, Inc. + Copyright (C) 2005-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -450,7 +450,7 @@ write_copyright (void) { static const char copyright[] = "\ /* Unicode characters and various properties.\n\ - Copyright (C) 2003-2021 Free Software Foundation, Inc.\n\ + Copyright (C) 2003-2022 Free Software Foundation, Inc.\n\ \n\ This program is free software; you can redistribute it and/or modify it\n\ under the terms of the GNU General Public License as published by the\n\ diff --git a/libcpp/mkdeps.c b/libcpp/mkdeps.cc index 1867e00..30e87d8 100644 --- a/libcpp/mkdeps.c +++ b/libcpp/mkdeps.cc @@ -1,5 +1,5 @@ /* Dependency generator for Makefile fragments. - Copyright (C) 2000-2021 Free Software Foundation, Inc. + Copyright (C) 2000-2022 Free Software Foundation, Inc. Contributed by Zack Weinberg, Mar 2000 This program is free software; you can redistribute it and/or modify it diff --git a/libcpp/pch.c b/libcpp/pch.cc index bb80964..9aa589a 100644 --- a/libcpp/pch.c +++ b/libcpp/pch.cc @@ -1,5 +1,5 @@ /* Part of CPP library. (Precompiled header reading/writing.) - Copyright (C) 2000-2021 Free Software Foundation, Inc. + Copyright (C) 2000-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the diff --git a/libcpp/po/ChangeLog b/libcpp/po/ChangeLog index 843a84b..54ca87f 100644 --- a/libcpp/po/ChangeLog +++ b/libcpp/po/ChangeLog @@ -1,3 +1,7 @@ +2021-12-20 Joseph Myers <joseph@codesourcery.com> + + * es.po: Update. + 2021-08-16 Joseph Myers <joseph@codesourcery.com> * de.po: Update. diff --git a/libcpp/po/es.po b/libcpp/po/es.po index fde067a..bd50130 100644 --- a/libcpp/po/es.po +++ b/libcpp/po/es.po @@ -6,10 +6,10 @@ # Antonio Ceballos Roa <aceballos@gmail.com>, 2021. msgid "" msgstr "" -"Project-Id-Version: cpplib 10.1-b20200209\n" +"Project-Id-Version: cpplib 11.1-b20210207\n" "Report-Msgid-Bugs-To: https://gcc.gnu.org/bugs/\n" "POT-Creation-Date: 2021-02-05 21:38+0000\n" -"PO-Revision-Date: 2021-01-07 11:33+0100\n" +"PO-Revision-Date: 2021-12-18 09:17+0100\n" "Last-Translator: Antonio Ceballos Roa <aceballos@gmail.com>\n" "Language-Team: Spanish <es@tp.org.es>\n" "Language: es\n" @@ -491,36 +491,28 @@ msgid "use of C99 long long integer constant" msgstr "uso de una constante entera long long C99" #: expr.c:822 -#, fuzzy -#| msgid "use of C++11 long long integer constant" msgid "use of C++23 %<size_t%> integer constant" -msgstr "uso de una constante entera long long C++11" +msgstr "uso de constante entera %<size_t%> de C++23" #: expr.c:823 -#, fuzzy -#| msgid "use of C++11 long long integer constant" msgid "use of C++23 %<make_signed_t<size_t>%> integer constant" -msgstr "uso de una constante entera long long C++11" +msgstr "uso de constante entera %<make_signed_t<size_t>%> de C++23" #: expr.c:834 msgid "imaginary constants are a GCC extension" -msgstr "las constantes imaginarias son una extensión GCC" +msgstr "las constantes imaginarias son una extensión de GCC" #: expr.c:841 msgid "binary constants are a C++14 feature or GCC extension" -msgstr "las constantes binarias son una característica C++14 o extensión GCC" +msgstr "las constantes binarias son una característica C++14 o una extensión de GCC" #: expr.c:843 -#, fuzzy -#| msgid "binary constants are a C++14 feature or GCC extension" msgid "binary constants are a C2X feature or GCC extension" -msgstr "las constantes binarias son una característica C++14 o extensión GCC" +msgstr "las constantes binarias son una característica de C2X o una extensión de GCC" #: expr.c:848 -#, fuzzy -#| msgid "decimal float constants are a C2X feature" msgid "binary constants are a C2X feature" -msgstr "las constantes de coma flotante decimal son una característica de C2X" +msgstr "las constantes binarias son una característica de C2X" #: expr.c:944 msgid "integer constant is too large for its type" @@ -713,16 +705,12 @@ msgid "`%.*s' is not in NFC" msgstr "`%.*s' no está en NFC" #: lex.c:1375 -#, fuzzy -#| msgid "__VA_OPT__ is not available until C++2a" msgid "__VA_OPT__ is not available until C++20" -msgstr "__VA_OPT__ no está disponible bajo C++2a" +msgstr "__VA_OPT__ no está disponible hasta C++20" #: lex.c:1382 -#, fuzzy -#| msgid "__VA_OPT__ can only appear in the expansion of a C++2a variadic macro" msgid "__VA_OPT__ can only appear in the expansion of a C++20 variadic macro" -msgstr "__VA_OPT__ solamente puede aparecer en la expansión de una macro variadic C++2a" +msgstr "__VA_OPT__ solamente puede aparecer en la expansión de una macro variadic de C++20" #: lex.c:1413 lex.c:1506 #, c-format @@ -778,12 +766,12 @@ msgstr "C++11 requiere un espacio entre cadena literal y macro" #: lex.c:2711 msgid "module control-line cannot be in included file" -msgstr "" +msgstr "la línea de control del módulo no puede estar en un fichero incluido" #: lex.c:2725 #, c-format msgid "module control-line \"%s\" cannot be an object-like macro" -msgstr "" +msgstr "la línea de control del módulo «%s» no puede ser una macro de tipo objeto" #: lex.c:3099 lex.c:4472 traditional.c:174 msgid "unterminated comment" @@ -811,16 +799,13 @@ msgid "unspellable token %s" msgstr "elemento %s impronunciable" #: lex.c:4627 -#, fuzzy, c-format -#| msgid "raw string delimiter longer than 16 characters" +#, c-format msgid "raw string delimiter longer than %d characters" -msgstr "el delimitador de cadena cruda es más largo que 16 caracteres" +msgstr "el delimitador de cadena cruda es mayor de %d caracteres" #: lex.c:4697 -#, fuzzy -#| msgid "unterminated #%s" msgid "unterminated literal" -msgstr "#%s sin terminar" +msgstr "literal sin terminar" #: macro.c:94 msgid "'##' cannot appear at either end of __VA_OPT__" diff --git a/libcpp/symtab.c b/libcpp/symtab.cc index 9a2fae0..a059214 100644 --- a/libcpp/symtab.c +++ b/libcpp/symtab.cc @@ -1,5 +1,5 @@ /* Hash tables. - Copyright (C) 2000-2021 Free Software Foundation, Inc. + Copyright (C) 2000-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the diff --git a/libcpp/system.h b/libcpp/system.h index ee5fbe2..e80cf02 100644 --- a/libcpp/system.h +++ b/libcpp/system.h @@ -1,6 +1,6 @@ /* Get common system includes and various definitions and declarations based on autoconf macros. - Copyright (C) 1998-2021 Free Software Foundation, Inc. + Copyright (C) 1998-2022 Free Software Foundation, Inc. This file is part of GCC. @@ -422,6 +422,18 @@ extern void fancy_abort (const char *, int, const char *) ATTRIBUTE_NORETURN; #define gcc_checking_assert(EXPR) ((void)(0 && (EXPR))) #endif +#ifdef __has_cpp_attribute +# if __has_cpp_attribute(likely) +# define ATTR_LIKELY [[likely]] +# elif __has_cpp_attribute(__likely__) +# define ATTR_LIKELY [[__likely__]] +# else +# define ATTR_LIKELY +# endif +#else +# define ATTR_LIKELY +#endif + /* Poison identifiers we do not want to use. */ #if (GCC_VERSION >= 3000) #undef calloc diff --git a/libcpp/traditional.c b/libcpp/traditional.cc index 92d0dd1..d02ea48 100644 --- a/libcpp/traditional.c +++ b/libcpp/traditional.cc @@ -1,5 +1,5 @@ /* CPP Library - traditional lexical analysis and macro expansion. - Copyright (C) 2002-2021 Free Software Foundation, Inc. + Copyright (C) 2002-2022 Free Software Foundation, Inc. Contributed by Neil Booth, May 2002 This program is free software; you can redistribute it and/or modify it @@ -80,7 +80,7 @@ enum ls {ls_none = 0, /* Normal state. */ ls_answer /* In answer to predicate. */ }; -/* Lexing TODO: Maybe handle space in escaped newlines. Stop lex.c +/* Lexing TODO: Maybe handle space in escaped newlines. Stop lex.cc from recognizing comments and directives during its lexing pass. */ static const uchar *skip_whitespace (cpp_reader *, const uchar *, int); diff --git a/libcpp/ucnid.h b/libcpp/ucnid.h index e7b0ac2..b15c123 100644 --- a/libcpp/ucnid.h +++ b/libcpp/ucnid.h @@ -1,5 +1,5 @@ /* Unicode characters and various properties. - Copyright (C) 2003-2021 Free Software Foundation, Inc. + Copyright (C) 2003-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the diff --git a/libcpp/ucnid.tab b/libcpp/ucnid.tab index 6542d11..0940f9e 100644 --- a/libcpp/ucnid.tab +++ b/libcpp/ucnid.tab @@ -1,5 +1,5 @@ ; Table of UCNs which are valid in identifiers. -; Copyright (C) 2003-2021 Free Software Foundation, Inc. +; Copyright (C) 2003-2022 Free Software Foundation, Inc. ; ; This program is free software; you can redistribute it and/or modify it ; under the terms of the GNU General Public License as published by the |