diff options
author | Lewis Hyatt <lhyatt@gmail.com> | 2021-08-24 19:30:44 -0400 |
---|---|---|
committer | Lewis Hyatt <lhyatt@gmail.com> | 2021-08-25 11:15:28 -0400 |
commit | 3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2 (patch) | |
tree | 688dfb2b2708df32fd2e6b548061eea352e79cea /gcc/input.c | |
parent | 43a5d46feabd93ba78983919234f05f5fc9a0982 (diff) | |
download | gcc-3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2.zip gcc-3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2.tar.gz gcc-3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2.tar.bz2 |
diagnostics: Support for -finput-charset [PR93067]
Adds the logic to handle -finput-charset in layout_get_source_line(), so that
source lines are converted from their input encodings prior to being output by
diagnostics machinery. Also adds the ability to strip a UTF-8 BOM similarly.
gcc/c-family/ChangeLog:
PR other/93067
* c-opts.c (c_common_input_charset_cb): New function.
(c_common_post_options): Call new function
diagnostic_initialize_input_context().
gcc/d/ChangeLog:
PR other/93067
* d-lang.cc (d_input_charset_callback): New function.
(d_init): Call new function
diagnostic_initialize_input_context().
gcc/fortran/ChangeLog:
PR other/93067
* cpp.c (gfc_cpp_post_options): Call new function
diagnostic_initialize_input_context().
gcc/ChangeLog:
PR other/93067
* coretypes.h (typedef diagnostic_input_charset_callback): Declare.
* diagnostic.c (diagnostic_initialize_input_context): New function.
* diagnostic.h (diagnostic_initialize_input_context): Declare.
* input.c (default_charset_callback): New function.
(file_cache::initialize_input_context): New function.
(file_cache_slot::create): Added ability to convert the input
according to the input context.
(file_cache::file_cache): Initialize the new input context.
(class file_cache_slot): Added new m_alloc_offset member.
(file_cache_slot::file_cache_slot): Initialize the new member.
(file_cache_slot::~file_cache_slot): Handle potentially offset buffer.
(file_cache_slot::maybe_grow): Likewise.
(file_cache_slot::needs_read_p): Handle NULL fp, which is now possible.
(file_cache_slot::get_next_line): Likewise.
* input.h (class file_cache): Added input context member.
libcpp/ChangeLog:
PR other/93067
* charset.c (init_iconv_desc): Adapt to permit PFILE argument to
be NULL.
(_cpp_convert_input): Likewise. Also move UTF-8 BOM logic to...
(cpp_check_utf8_bom): ...here. New function.
(cpp_input_conversion_is_trivial): New function.
* files.c (read_file_guts): Allow PFILE argument to be NULL. Add
INPUT_CHARSET argument as an alternate source of this information.
(read_file): Pass the new argument to read_file_guts.
(cpp_get_converted_source): New function.
* include/cpplib.h (struct cpp_converted_source): Declare.
(cpp_get_converted_source): Declare.
(cpp_input_conversion_is_trivial): Declare.
(cpp_check_utf8_bom): Declare.
gcc/testsuite/ChangeLog:
PR other/93067
* gcc.dg/diagnostic-input-charset-1.c: New test.
* gcc.dg/diagnostic-input-utf8-bom.c: New test.
Diffstat (limited to 'gcc/input.c')
-rw-r--r-- | gcc/input.c | 100 |
1 files changed, 88 insertions, 12 deletions
diff --git a/gcc/input.c b/gcc/input.c index de20d98..4b80986 100644 --- a/gcc/input.c +++ b/gcc/input.c @@ -22,7 +22,6 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "intl.h" #include "diagnostic.h" -#include "diagnostic-core.h" #include "selftest.h" #include "cpplib.h" @@ -30,6 +29,20 @@ along with GCC; see the file COPYING3. If not see #define HAVE_ICONV 0 #endif +/* Input charset configuration. */ +static const char *default_charset_callback (const char *) +{ + return nullptr; +} + +void +file_cache::initialize_input_context (diagnostic_input_charset_callback ccb, + bool should_skip_bom) +{ + in_context.ccb = (ccb ? ccb : default_charset_callback); + in_context.should_skip_bom = should_skip_bom; +} + /* This is a cache used by get_next_line to store the content of a file to be searched for file lines. */ class file_cache_slot @@ -51,7 +64,8 @@ public: void inc_use_count () { m_use_count++; } - void create (const char *file_path, FILE *fp, unsigned highest_use_count); + bool create (const file_cache::input_context &in_context, + const char *file_path, FILE *fp, unsigned highest_use_count); void evict (); private: @@ -110,6 +124,10 @@ public: far. */ char *m_data; + /* The allocated buffer to be freed may start a little earlier than DATA, + e.g. if a UTF8 BOM was skipped at the beginning. */ + int m_alloc_offset; + /* The size of the DATA array above.*/ size_t m_size; @@ -147,6 +165,17 @@ public: doesn't explode. We thus scale total_lines down to line_record_size. */ vec<line_info, va_heap> m_line_record; + + void offset_buffer (int offset) + { + gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0 + : (size_t) offset <= m_size); + gcc_assert (m_data); + m_alloc_offset += offset; + m_data += offset; + m_size -= offset; + } + }; /* Current position in real source file. */ @@ -419,21 +448,25 @@ file_cache::add_file (const char *file_path) unsigned highest_use_count = 0; file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count); - r->create (file_path, fp, highest_use_count); + if (!r->create (in_context, file_path, fp, highest_use_count)) + return NULL; return r; } /* Populate this slot for use on FILE_PATH and FP, dropping any existing cached content within it. */ -void -file_cache_slot::create (const char *file_path, FILE *fp, +bool +file_cache_slot::create (const file_cache::input_context &in_context, + const char *file_path, FILE *fp, unsigned highest_use_count) { m_file_path = file_path; if (m_fp) fclose (m_fp); m_fp = fp; + if (m_alloc_offset) + offset_buffer (-m_alloc_offset); m_nb_read = 0; m_line_start_idx = 0; m_line_num = 0; @@ -443,6 +476,36 @@ file_cache_slot::create (const char *file_path, FILE *fp, m_use_count = ++highest_use_count; m_total_lines = total_lines_num (file_path); m_missing_trailing_newline = true; + + + /* Check the input configuration to determine if we need to do any + transformations, such as charset conversion or BOM skipping. */ + if (const char *input_charset = in_context.ccb (file_path)) + { + /* Need a full-blown conversion of the input charset. */ + fclose (m_fp); + m_fp = NULL; + const cpp_converted_source cs + = cpp_get_converted_source (file_path, input_charset); + if (!cs.data) + return false; + if (m_data) + XDELETEVEC (m_data); + m_data = cs.data; + m_nb_read = m_size = cs.len; + m_alloc_offset = cs.data - cs.to_free; + } + else if (in_context.should_skip_bom) + { + if (read_data ()) + { + const int offset = cpp_check_utf8_bom (m_data, m_nb_read); + offset_buffer (offset); + m_nb_read -= offset; + } + } + + return true; } /* file_cache's ctor. */ @@ -450,6 +513,7 @@ file_cache_slot::create (const char *file_path, FILE *fp, file_cache::file_cache () : m_file_slots (new file_cache_slot[num_file_slots]) { + initialize_input_context (nullptr, false); } /* file_cache's dtor. */ @@ -478,8 +542,8 @@ file_cache::lookup_or_add_file (const char *file_path) file_cache_slot::file_cache_slot () : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0), - m_size (0), m_nb_read (0), m_line_start_idx (0), m_line_num (0), - m_total_lines (0), m_missing_trailing_newline (true) + m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0), + m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true) { m_line_record.create (0); } @@ -495,6 +559,7 @@ file_cache_slot::~file_cache_slot () } if (m_data) { + offset_buffer (-m_alloc_offset); XDELETEVEC (m_data); m_data = 0; } @@ -509,7 +574,7 @@ file_cache_slot::~file_cache_slot () bool file_cache_slot::needs_read_p () const { - return (m_nb_read == 0 + return m_fp && (m_nb_read == 0 || m_nb_read == m_size || (m_line_start_idx >= m_nb_read - 1)); } @@ -531,9 +596,20 @@ file_cache_slot::maybe_grow () if (!needs_grow_p ()) return; - size_t size = m_size == 0 ? buffer_size : m_size * 2; - m_data = XRESIZEVEC (char, m_data, size); - m_size = size; + if (!m_data) + { + gcc_assert (m_size == 0 && m_alloc_offset == 0); + m_size = buffer_size; + m_data = XNEWVEC (char, m_size); + } + else + { + const int offset = m_alloc_offset; + offset_buffer (-offset); + m_size *= 2; + m_data = XRESIZEVEC (char, m_data, m_size); + offset_buffer (offset); + } } /* Read more data into the cache. Extends the cache if need be. @@ -632,7 +708,7 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len) m_missing_trailing_newline = false; } - if (ferror (m_fp)) + if (m_fp && ferror (m_fp)) return false; /* At this point, we've found the end of the of line. It either |