From 1174314811af52779497462f26d21ea0038d1a85 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Tue, 13 Apr 2021 11:57:55 +0200 Subject: Fix thinko in libcpp preparation patch for modules The problem is that the new IS_MACRO_LOC macro: inline bool IS_MACRO_LOC (location_t loc) { return !IS_ORDINARY_LOC (loc) && !IS_ADHOC_LOC (loc); } is not fully correct since the position of the macro lines is not fixed: /* Returns the lowest location [of a token resulting from macro expansion] encoded in this line table. */ inline location_t LINEMAPS_MACRO_LOWEST_LOCATION (const line_maps *set) { return LINEMAPS_MACRO_USED (set) ? MAP_START_LOCATION (LINEMAPS_LAST_MACRO_MAP (set)) : MAX_LOCATION_T + 1; } In Ada, LINEMAPS_MACRO_USED is false so LINEMAPS_MACRO_LOWEST_LOCATION is MAX_LOCATION_T + 1, but IS_MACRO_LOC nevertheless returns true for anything in the range [LINE_MAP_MAX_LOCATION; MAX_LOCATION_T], thus yielding an ICE in linemap_macro_map_lookup for very large files. libcpp/ * include/line-map.h (IS_MACRO_LOC): Delete. * line-map.c (linemap_location_from_macro_expansion_p): Test LINEMAPS_MACRO_LOWEST_LOCATION of the linemap. gcc/cp/ * module.cc (ordinary_loc_of): Test LINEMAPS_MACRO_LOWEST_LOCATION of the linemap. (module_state::write_location): Likewise. --- libcpp/include/line-map.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'libcpp/include') diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h index 40919d0..7d96417 100644 --- a/libcpp/include/line-map.h +++ b/libcpp/include/line-map.h @@ -563,7 +563,7 @@ struct GTY((tag ("2"))) line_map_macro : public line_map { #define linemap_assert_fails(EXPR) (! (EXPR)) #endif -/* Get whether location LOC is an ad-hoc, ordinary or macro location. */ +/* Get whether location LOC is an ordinary location. */ inline bool IS_ORDINARY_LOC (location_t loc) @@ -571,18 +571,14 @@ IS_ORDINARY_LOC (location_t loc) return loc < LINE_MAP_MAX_LOCATION; } +/* Get whether location LOC is an ad-hoc location. */ + inline bool IS_ADHOC_LOC (location_t loc) { return loc > MAX_LOCATION_T; } -inline bool -IS_MACRO_LOC (location_t loc) -{ - return !IS_ORDINARY_LOC (loc) && !IS_ADHOC_LOC (loc); -} - /* Categorize line map kinds. */ inline bool -- cgit v1.1 From 71d38ec80008afdbb9a059253407d80598b765c0 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Tue, 11 May 2021 23:54:01 +0000 Subject: preprocessor: Support C2X #elifdef, #elifndef C2X adds #elifdef and #elifndef preprocessor directives; these have also been proposed for C++. Implement these directives in libcpp accordingly. In this implementation, #elifdef and #elifndef are treated as non-directives for any language version other than c2x and gnu2x (if the feature is accepted for C++, it can trivially be enabled for relevant C++ versions). In strict conformance modes for prior language versions, this is required, as illustrated by the c11-elifdef-1.c test added. Bootstrapped with no regressions for x86_64-pc-linux-gnu. libcpp/ * include/cpplib.h (struct cpp_options): Add elifdef. * init.c (struct lang_flags): Add elifdef. (lang_defaults): Update to include elifdef initializers. (cpp_set_lang): Set elifdef for pfile based on language. * directives.c (STDC2X, ELIFDEF): New macros. (EXTENSION): Increase value to 3. (DIRECTIVE_TABLE): Add #elifdef and #elifndef. (_cpp_handle_directive): Do not treat ELIFDEF directives as directives for language versions without the #elifdef feature. (do_elif): Handle #elifdef and #elifndef. (do_elifdef, do_elifndef): New functions. gcc/testsuite/ * gcc.dg/cpp/c11-elifdef-1.c, gcc.dg/cpp/c2x-elifdef-1.c, gcc.dg/cpp/c2x-elifdef-2.c: New tests. --- libcpp/include/cpplib.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'libcpp/include') diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 41d75d9..2cdaf19 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -497,6 +497,9 @@ struct cpp_options /* Nonzero for the '::' token. */ unsigned char scope; + /* Nonzero for the '#elifdef' and '#elifndef' directives. */ + unsigned char elifdef; + /* Nonzero means tokenize C++20 module directives. */ unsigned char module_directives; -- cgit v1.1 From 1a9b3f04c11eb467a8dc504a37dad57a371a0d4c Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Thu, 20 May 2021 08:10:50 +0000 Subject: c: Add support for __FILE_NAME__ macro (PR c/42579) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The toolchain provided by ST for stm32 has had support for __FILENAME__ for a while, but clang/llvm has recently implemented support for __FILE_NAME__, so it seems better to use the same macro name in GCC. It happens that the ST patch is similar to the one proposed in PR c/42579. Given these input files: :::::::::::::: mydir/myinc.h :::::::::::::: char* mystringh_file = __FILE__; char* mystringh_filename = __FILE_NAME__; char* mystringh_base_file = __BASE_FILE__; :::::::::::::: mydir/mysrc.c :::::::::::::: char* mystring_file = __FILE__; char* mystring_filename = __FILE_NAME__; char* mystring_base_file = __BASE_FILE__; we produce: $ gcc mydir/mysrc.c -I . -E char* mystringh_file = "./mydir/myinc.h"; char* mystringh_filename = "myinc.h"; char* mystringh_base_file = "mydir/mysrc.c"; char* mystring_file = "mydir/mysrc.c"; char* mystring_filename = "mysrc.c"; char* mystring_base_file = "mydir/mysrc.c"; 2021-05-20 Christophe Lyon Torbjörn Svensson PR c/42579 libcpp/ * include/cpplib.h (cpp_builtin_type): Add BT_FILE_NAME entry. * init.c (builtin_array): Likewise. * macro.c (_cpp_builtin_macro_text): Add support for BT_FILE_NAME. gcc/ * doc/cpp.texi (Common Predefined Macros): Document __FILE_NAME__. gcc/testsuite/ * c-c++-common/spellcheck-reserved.c: Add tests for __FILE_NAME__. * c-c++-common/cpp/file-name-1.c: New test. --- libcpp/include/cpplib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'libcpp/include') diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 2cdaf19..7e84063 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -886,6 +886,7 @@ enum cpp_builtin_type BT_SPECLINE = 0, /* `__LINE__' */ BT_DATE, /* `__DATE__' */ BT_FILE, /* `__FILE__' */ + BT_FILE_NAME, /* `__FILE_NAME__' */ BT_BASE_FILE, /* `__BASE_FILE__' */ BT_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */ BT_TIME, /* `__TIME__' */ -- cgit v1.1 From 3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2 Mon Sep 17 00:00:00 2001 From: Lewis Hyatt Date: Tue, 24 Aug 2021 19:30:44 -0400 Subject: diagnostics: Support for -finput-charset [PR93067] Adds the logic to handle -finput-charset in layout_get_source_line(), so that source lines are converted from their input encodings prior to being output by diagnostics machinery. Also adds the ability to strip a UTF-8 BOM similarly. gcc/c-family/ChangeLog: PR other/93067 * c-opts.c (c_common_input_charset_cb): New function. (c_common_post_options): Call new function diagnostic_initialize_input_context(). gcc/d/ChangeLog: PR other/93067 * d-lang.cc (d_input_charset_callback): New function. (d_init): Call new function diagnostic_initialize_input_context(). gcc/fortran/ChangeLog: PR other/93067 * cpp.c (gfc_cpp_post_options): Call new function diagnostic_initialize_input_context(). gcc/ChangeLog: PR other/93067 * coretypes.h (typedef diagnostic_input_charset_callback): Declare. * diagnostic.c (diagnostic_initialize_input_context): New function. * diagnostic.h (diagnostic_initialize_input_context): Declare. * input.c (default_charset_callback): New function. (file_cache::initialize_input_context): New function. (file_cache_slot::create): Added ability to convert the input according to the input context. (file_cache::file_cache): Initialize the new input context. (class file_cache_slot): Added new m_alloc_offset member. (file_cache_slot::file_cache_slot): Initialize the new member. (file_cache_slot::~file_cache_slot): Handle potentially offset buffer. (file_cache_slot::maybe_grow): Likewise. (file_cache_slot::needs_read_p): Handle NULL fp, which is now possible. (file_cache_slot::get_next_line): Likewise. * input.h (class file_cache): Added input context member. libcpp/ChangeLog: PR other/93067 * charset.c (init_iconv_desc): Adapt to permit PFILE argument to be NULL. (_cpp_convert_input): Likewise. Also move UTF-8 BOM logic to... (cpp_check_utf8_bom): ...here. New function. (cpp_input_conversion_is_trivial): New function. * files.c (read_file_guts): Allow PFILE argument to be NULL. Add INPUT_CHARSET argument as an alternate source of this information. (read_file): Pass the new argument to read_file_guts. (cpp_get_converted_source): New function. * include/cpplib.h (struct cpp_converted_source): Declare. (cpp_get_converted_source): Declare. (cpp_input_conversion_is_trivial): Declare. (cpp_check_utf8_bom): Declare. gcc/testsuite/ChangeLog: PR other/93067 * gcc.dg/diagnostic-input-charset-1.c: New test. * gcc.dg/diagnostic-input-utf8-bom.c: New test. --- libcpp/include/cpplib.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'libcpp/include') diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 7e84063..af14291 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1379,6 +1379,20 @@ extern struct _cpp_file *cpp_get_file (cpp_buffer *); extern cpp_buffer *cpp_get_prev (cpp_buffer *); extern void cpp_clear_file_cache (cpp_reader *); +/* cpp_get_converted_source returns the contents of the given file, as it exists + after cpplib has read it and converted it from the input charset to the + source charset. Return struct will be zero-filled if the data could not be + read for any reason. The data starts at the DATA pointer, but the TO_FREE + pointer is what should be passed to free(), as there may be an offset. */ +struct cpp_converted_source +{ + char *to_free; + char *data; + size_t len; +}; +cpp_converted_source cpp_get_converted_source (const char *fname, + const char *input_charset); + /* In pch.c */ struct save_macro_data; extern int cpp_save_state (cpp_reader *, FILE *); @@ -1449,6 +1463,7 @@ class cpp_display_width_computation { /* Convenience functions that are simple use cases for class cpp_display_width_computation. Tab characters will be expanded to spaces as determined by TABSTOP. */ + int cpp_byte_column_to_display_column (const char *data, int data_length, int column, int tabstop); inline int cpp_display_width (const char *data, int data_length, @@ -1461,4 +1476,7 @@ int cpp_display_column_to_byte_column (const char *data, int data_length, int display_col, int tabstop); int cpp_wcwidth (cppchar_t c); +bool cpp_input_conversion_is_trivial (const char *input_charset); +int cpp_check_utf8_bom (const char *data, size_t data_length); + #endif /* ! LIBCPP_CPPLIB_H */ -- cgit v1.1 From e4d2305adf4e9d11e396c1c5e5ae6214340cbcc2 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 31 Aug 2021 11:15:21 -0600 Subject: Disable gcc_rich_location copying and assignment. gcc/cp/ChangeLog: * parser.c (cp_parser_selection_statement): Use direct initialization instead of copy. gcc/ChangeLog: * gcc-rich-location.h (gcc_rich_location): Make ctor explicit. libcpp/ChangeLog: * include/line-map.h (class rich_location): Disable copying and assignment. --- libcpp/include/line-map.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'libcpp/include') diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h index 7d96417..464494b 100644 --- a/libcpp/include/line-map.h +++ b/libcpp/include/line-map.h @@ -1670,6 +1670,12 @@ class rich_location /* Destructor. */ ~rich_location (); + /* The class manages the memory pointed to by the elements of + the M_FIXIT_HINTS vector and is not meant to be copied or + assigned. */ + rich_location (const rich_location &) = delete; + void operator= (const rich_location &) = delete; + /* Accessors. */ location_t get_loc () const { return get_loc (0); } location_t get_loc (unsigned int idx) const; -- cgit v1.1 From c4d6dcacfca1b804504515496e6d9de176d7f51e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 1 Sep 2021 22:33:06 +0200 Subject: libcpp: Implement C++23 P1949R7 - C++ Identifier Syntax using Unicode Standard Annex 31 The following patch implements the P1949R7 - C++ Identifier Syntax using Unicode Standard Annex 31 paper. We already allow UTF-8 characters in the source, so that part is already implemented, so IMHO all we need to do is pedwarn instead of just warn for the (default) -Wnormalize=nfc (or for -Wnormalize={id,nkfc}) if the character is not in NFC and to use the unicode XID_Start and XID_Continue derived code properties to find out what characters are allowed (the standard actually adds U+005F to XID_Start, but we are handling the ASCII compatible characters differently already and they aren't allowed in UCNs in identifiers). Instead of hardcoding the large tables in ucnid.tab, this patch makes makeucnid.c read them from the Unicode tables (13.0.0 version at this point). For non-pedantic mode, we accept as 2nd+ char in identifiers a union of valid characters in all supported modes, but for the 1st char it was actually pedantically requiring that it is not any of the characters that may not appear in the currently chosen standard as the first character. This patch changes it such that also what is allowed at the start of an identifier is a union of characters valid at the start of an identifier in any of the pedantic modes. 2021-09-01 Jakub Jelinek PR c++/100977 libcpp/ * include/cpplib.h (struct cpp_options): Add cxx23_identifiers. * charset.c (CXX23, NXX23): New enumerators. (CID, NFC, NKC, CTX): Renumber. (ucn_valid_in_identifier): Implement P1949R7 - use CXX23 and NXX23 flags for cxx23_identifiers. For start character in non-pedantic mode, allow characters that are allowed as start characters in any of the supported language modes, rather than disallowing characters allowed only as non-start characters in current mode but for characters from other language modes allowing them even if they are never allowed at start. * init.c (struct lang_flags): Add cxx23_identifiers. (lang_defaults): Add cxx23_identifiers column. (cpp_set_lang): Initialize CPP_OPTION (pfile, cxx23_identifiers). * lex.c (warn_about_normalization): If cxx23_identifiers, use cpp_pedwarning_with_line instead of cpp_warning_with_line for "is not in NFC" diagnostics. * makeucnid.c: Adjust usage comment. (CXX23, NXX23): New enumerators. (all_languages): Add CXX23. (not_NFC, not_NFKC, maybe_not_NFC): Renumber. (read_derivedcore): New function. (write_table): Print also CXX23 and NXX23 columns. (main): Require 5 arguments instead of 4, call read_derivedcore. * ucnid.h: Regenerated using Unicode 13.0.0 files. gcc/testsuite/ * g++.dg/cpp23/normalize1.C: New test. * g++.dg/cpp23/normalize2.C: New test. * g++.dg/cpp23/normalize3.C: New test. * g++.dg/cpp23/normalize4.C: New test. * g++.dg/cpp23/normalize5.C: New test. * g++.dg/cpp23/normalize6.C: New test. * g++.dg/cpp23/normalize7.C: New test. * g++.dg/cpp23/ucnid-1-utf8.C: New test. * g++.dg/cpp23/ucnid-2-utf8.C: New test. * gcc.dg/cpp/ucnid-4.c: Don't expect "not valid at the start of an identifier" errors. * gcc.dg/cpp/ucnid-4-utf8.c: Likewise. * gcc.dg/cpp/ucnid-5-utf8.c: New test. --- libcpp/include/cpplib.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'libcpp/include') diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index af14291..6e2fcb6 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -482,6 +482,10 @@ struct cpp_options in C11 and C++11. */ unsigned char c11_identifiers; + /* Nonzero means extended identifiers allow the characters specified + in C++23. */ + unsigned char cxx23_identifiers; + /* Nonzero for C++ 2014 Standard binary constants. */ unsigned char binary_constants; -- cgit v1.1