aboutsummaryrefslogtreecommitdiff
path: root/gcc/input.c
diff options
context:
space:
mode:
authorLewis Hyatt <lhyatt@gmail.com>2021-08-24 19:30:44 -0400
committerLewis Hyatt <lhyatt@gmail.com>2021-08-25 11:15:28 -0400
commit3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2 (patch)
tree688dfb2b2708df32fd2e6b548061eea352e79cea /gcc/input.c
parent43a5d46feabd93ba78983919234f05f5fc9a0982 (diff)
downloadgcc-3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2.zip
gcc-3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2.tar.gz
gcc-3ac6b5cff1eca4e1748c671960ef7b4ca5e47fd2.tar.bz2
diagnostics: Support for -finput-charset [PR93067]
Adds the logic to handle -finput-charset in layout_get_source_line(), so that source lines are converted from their input encodings prior to being output by diagnostics machinery. Also adds the ability to strip a UTF-8 BOM similarly. gcc/c-family/ChangeLog: PR other/93067 * c-opts.c (c_common_input_charset_cb): New function. (c_common_post_options): Call new function diagnostic_initialize_input_context(). gcc/d/ChangeLog: PR other/93067 * d-lang.cc (d_input_charset_callback): New function. (d_init): Call new function diagnostic_initialize_input_context(). gcc/fortran/ChangeLog: PR other/93067 * cpp.c (gfc_cpp_post_options): Call new function diagnostic_initialize_input_context(). gcc/ChangeLog: PR other/93067 * coretypes.h (typedef diagnostic_input_charset_callback): Declare. * diagnostic.c (diagnostic_initialize_input_context): New function. * diagnostic.h (diagnostic_initialize_input_context): Declare. * input.c (default_charset_callback): New function. (file_cache::initialize_input_context): New function. (file_cache_slot::create): Added ability to convert the input according to the input context. (file_cache::file_cache): Initialize the new input context. (class file_cache_slot): Added new m_alloc_offset member. (file_cache_slot::file_cache_slot): Initialize the new member. (file_cache_slot::~file_cache_slot): Handle potentially offset buffer. (file_cache_slot::maybe_grow): Likewise. (file_cache_slot::needs_read_p): Handle NULL fp, which is now possible. (file_cache_slot::get_next_line): Likewise. * input.h (class file_cache): Added input context member. libcpp/ChangeLog: PR other/93067 * charset.c (init_iconv_desc): Adapt to permit PFILE argument to be NULL. (_cpp_convert_input): Likewise. Also move UTF-8 BOM logic to... (cpp_check_utf8_bom): ...here. New function. (cpp_input_conversion_is_trivial): New function. * files.c (read_file_guts): Allow PFILE argument to be NULL. Add INPUT_CHARSET argument as an alternate source of this information. (read_file): Pass the new argument to read_file_guts. (cpp_get_converted_source): New function. * include/cpplib.h (struct cpp_converted_source): Declare. (cpp_get_converted_source): Declare. (cpp_input_conversion_is_trivial): Declare. (cpp_check_utf8_bom): Declare. gcc/testsuite/ChangeLog: PR other/93067 * gcc.dg/diagnostic-input-charset-1.c: New test. * gcc.dg/diagnostic-input-utf8-bom.c: New test.
Diffstat (limited to 'gcc/input.c')
-rw-r--r--gcc/input.c100
1 files changed, 88 insertions, 12 deletions
diff --git a/gcc/input.c b/gcc/input.c
index de20d98..4b80986 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -22,7 +22,6 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "intl.h"
#include "diagnostic.h"
-#include "diagnostic-core.h"
#include "selftest.h"
#include "cpplib.h"
@@ -30,6 +29,20 @@ along with GCC; see the file COPYING3. If not see
#define HAVE_ICONV 0
#endif
+/* Input charset configuration. */
+static const char *default_charset_callback (const char *)
+{
+ return nullptr;
+}
+
+void
+file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
+ bool should_skip_bom)
+{
+ in_context.ccb = (ccb ? ccb : default_charset_callback);
+ in_context.should_skip_bom = should_skip_bom;
+}
+
/* This is a cache used by get_next_line to store the content of a
file to be searched for file lines. */
class file_cache_slot
@@ -51,7 +64,8 @@ public:
void inc_use_count () { m_use_count++; }
- void create (const char *file_path, FILE *fp, unsigned highest_use_count);
+ bool create (const file_cache::input_context &in_context,
+ const char *file_path, FILE *fp, unsigned highest_use_count);
void evict ();
private:
@@ -110,6 +124,10 @@ public:
far. */
char *m_data;
+ /* The allocated buffer to be freed may start a little earlier than DATA,
+ e.g. if a UTF8 BOM was skipped at the beginning. */
+ int m_alloc_offset;
+
/* The size of the DATA array above.*/
size_t m_size;
@@ -147,6 +165,17 @@ public:
doesn't explode. We thus scale total_lines down to
line_record_size. */
vec<line_info, va_heap> m_line_record;
+
+ void offset_buffer (int offset)
+ {
+ gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
+ : (size_t) offset <= m_size);
+ gcc_assert (m_data);
+ m_alloc_offset += offset;
+ m_data += offset;
+ m_size -= offset;
+ }
+
};
/* Current position in real source file. */
@@ -419,21 +448,25 @@ file_cache::add_file (const char *file_path)
unsigned highest_use_count = 0;
file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
- r->create (file_path, fp, highest_use_count);
+ if (!r->create (in_context, file_path, fp, highest_use_count))
+ return NULL;
return r;
}
/* Populate this slot for use on FILE_PATH and FP, dropping any
existing cached content within it. */
-void
-file_cache_slot::create (const char *file_path, FILE *fp,
+bool
+file_cache_slot::create (const file_cache::input_context &in_context,
+ const char *file_path, FILE *fp,
unsigned highest_use_count)
{
m_file_path = file_path;
if (m_fp)
fclose (m_fp);
m_fp = fp;
+ if (m_alloc_offset)
+ offset_buffer (-m_alloc_offset);
m_nb_read = 0;
m_line_start_idx = 0;
m_line_num = 0;
@@ -443,6 +476,36 @@ file_cache_slot::create (const char *file_path, FILE *fp,
m_use_count = ++highest_use_count;
m_total_lines = total_lines_num (file_path);
m_missing_trailing_newline = true;
+
+
+ /* Check the input configuration to determine if we need to do any
+ transformations, such as charset conversion or BOM skipping. */
+ if (const char *input_charset = in_context.ccb (file_path))
+ {
+ /* Need a full-blown conversion of the input charset. */
+ fclose (m_fp);
+ m_fp = NULL;
+ const cpp_converted_source cs
+ = cpp_get_converted_source (file_path, input_charset);
+ if (!cs.data)
+ return false;
+ if (m_data)
+ XDELETEVEC (m_data);
+ m_data = cs.data;
+ m_nb_read = m_size = cs.len;
+ m_alloc_offset = cs.data - cs.to_free;
+ }
+ else if (in_context.should_skip_bom)
+ {
+ if (read_data ())
+ {
+ const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
+ offset_buffer (offset);
+ m_nb_read -= offset;
+ }
+ }
+
+ return true;
}
/* file_cache's ctor. */
@@ -450,6 +513,7 @@ file_cache_slot::create (const char *file_path, FILE *fp,
file_cache::file_cache ()
: m_file_slots (new file_cache_slot[num_file_slots])
{
+ initialize_input_context (nullptr, false);
}
/* file_cache's dtor. */
@@ -478,8 +542,8 @@ file_cache::lookup_or_add_file (const char *file_path)
file_cache_slot::file_cache_slot ()
: m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
- m_size (0), m_nb_read (0), m_line_start_idx (0), m_line_num (0),
- m_total_lines (0), m_missing_trailing_newline (true)
+ m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
+ m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
{
m_line_record.create (0);
}
@@ -495,6 +559,7 @@ file_cache_slot::~file_cache_slot ()
}
if (m_data)
{
+ offset_buffer (-m_alloc_offset);
XDELETEVEC (m_data);
m_data = 0;
}
@@ -509,7 +574,7 @@ file_cache_slot::~file_cache_slot ()
bool
file_cache_slot::needs_read_p () const
{
- return (m_nb_read == 0
+ return m_fp && (m_nb_read == 0
|| m_nb_read == m_size
|| (m_line_start_idx >= m_nb_read - 1));
}
@@ -531,9 +596,20 @@ file_cache_slot::maybe_grow ()
if (!needs_grow_p ())
return;
- size_t size = m_size == 0 ? buffer_size : m_size * 2;
- m_data = XRESIZEVEC (char, m_data, size);
- m_size = size;
+ if (!m_data)
+ {
+ gcc_assert (m_size == 0 && m_alloc_offset == 0);
+ m_size = buffer_size;
+ m_data = XNEWVEC (char, m_size);
+ }
+ else
+ {
+ const int offset = m_alloc_offset;
+ offset_buffer (-offset);
+ m_size *= 2;
+ m_data = XRESIZEVEC (char, m_data, m_size);
+ offset_buffer (offset);
+ }
}
/* Read more data into the cache. Extends the cache if need be.
@@ -632,7 +708,7 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
m_missing_trailing_newline = false;
}
- if (ferror (m_fp))
+ if (m_fp && ferror (m_fp))
return false;
/* At this point, we've found the end of the of line. It either