From 3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2 Mon Sep 17 00:00:00 2001 From: Lewis Hyatt Date: Tue, 24 Aug 2021 19:30:44 -0400 Subject: diagnostics: Support for -finput-charset [PR93067] Adds the logic to handle -finput-charset in layout_get_source_line(), so that source lines are converted from their input encodings prior to being output by diagnostics machinery. Also adds the ability to strip a UTF-8 BOM similarly. gcc/c-family/ChangeLog: PR other/93067 * c-opts.c (c_common_input_charset_cb): New function. (c_common_post_options): Call new function diagnostic_initialize_input_context(). gcc/d/ChangeLog: PR other/93067 * d-lang.cc (d_input_charset_callback): New function. (d_init): Call new function diagnostic_initialize_input_context(). gcc/fortran/ChangeLog: PR other/93067 * cpp.c (gfc_cpp_post_options): Call new function diagnostic_initialize_input_context(). gcc/ChangeLog: PR other/93067 * coretypes.h (typedef diagnostic_input_charset_callback): Declare. * diagnostic.c (diagnostic_initialize_input_context): New function. * diagnostic.h (diagnostic_initialize_input_context): Declare. * input.c (default_charset_callback): New function. (file_cache::initialize_input_context): New function. (file_cache_slot::create): Added ability to convert the input according to the input context. (file_cache::file_cache): Initialize the new input context. (class file_cache_slot): Added new m_alloc_offset member. (file_cache_slot::file_cache_slot): Initialize the new member. (file_cache_slot::~file_cache_slot): Handle potentially offset buffer. (file_cache_slot::maybe_grow): Likewise. (file_cache_slot::needs_read_p): Handle NULL fp, which is now possible. (file_cache_slot::get_next_line): Likewise. * input.h (class file_cache): Added input context member. libcpp/ChangeLog: PR other/93067 * charset.c (init_iconv_desc): Adapt to permit PFILE argument to be NULL. (_cpp_convert_input): Likewise. Also move UTF-8 BOM logic to... (cpp_check_utf8_bom): ...here. New function. (cpp_input_conversion_is_trivial): New function. * files.c (read_file_guts): Allow PFILE argument to be NULL. Add INPUT_CHARSET argument as an alternate source of this information. (read_file): Pass the new argument to read_file_guts. (cpp_get_converted_source): New function. * include/cpplib.h (struct cpp_converted_source): Declare. (cpp_get_converted_source): Declare. (cpp_input_conversion_is_trivial): Declare. (cpp_check_utf8_bom): Declare. gcc/testsuite/ChangeLog: PR other/93067 * gcc.dg/diagnostic-input-charset-1.c: New test. * gcc.dg/diagnostic-input-utf8-bom.c: New test. --- libcpp/include/cpplib.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'libcpp/include/cpplib.h') diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 7e84063..af14291 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1379,6 +1379,20 @@ extern struct _cpp_file *cpp_get_file (cpp_buffer *); extern cpp_buffer *cpp_get_prev (cpp_buffer *); extern void cpp_clear_file_cache (cpp_reader *); +/* cpp_get_converted_source returns the contents of the given file, as it exists + after cpplib has read it and converted it from the input charset to the + source charset. Return struct will be zero-filled if the data could not be + read for any reason. The data starts at the DATA pointer, but the TO_FREE + pointer is what should be passed to free(), as there may be an offset. */ +struct cpp_converted_source +{ + char *to_free; + char *data; + size_t len; +}; +cpp_converted_source cpp_get_converted_source (const char *fname, + const char *input_charset); + /* In pch.c */ struct save_macro_data; extern int cpp_save_state (cpp_reader *, FILE *); @@ -1449,6 +1463,7 @@ class cpp_display_width_computation { /* Convenience functions that are simple use cases for class cpp_display_width_computation. Tab characters will be expanded to spaces as determined by TABSTOP. */ + int cpp_byte_column_to_display_column (const char *data, int data_length, int column, int tabstop); inline int cpp_display_width (const char *data, int data_length, @@ -1461,4 +1476,7 @@ int cpp_display_column_to_byte_column (const char *data, int data_length, int display_col, int tabstop); int cpp_wcwidth (cppchar_t c); +bool cpp_input_conversion_is_trivial (const char *input_charset); +int cpp_check_utf8_bom (const char *data, size_t data_length); + #endif /* ! LIBCPP_CPPLIB_H */ -- cgit v1.1