aboutsummaryrefslogtreecommitdiff
path: root/gcc/diagnostics/file-cache.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/diagnostics/file-cache.cc')
-rw-r--r--gcc/diagnostics/file-cache.cc1083
1 files changed, 1083 insertions, 0 deletions
diff --git a/gcc/diagnostics/file-cache.cc b/gcc/diagnostics/file-cache.cc
new file mode 100644
index 0000000..febeb03
--- /dev/null
+++ b/gcc/diagnostics/file-cache.cc
@@ -0,0 +1,1083 @@
+/* Caching input files for use by diagnostics.
+ Copyright (C) 2004-2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "cpplib.h"
+#include "diagnostics/file-cache.h"
+#include "selftest.h"
+
+#ifndef HAVE_ICONV
+#define HAVE_ICONV 0
+#endif
+
+namespace diagnostics {
+
+/* Input charset configuration. */
+static const char *default_charset_callback (const char *)
+{
+ return nullptr;
+}
+
+void
+file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
+ bool should_skip_bom)
+{
+ m_input_context.ccb = (ccb ? ccb : default_charset_callback);
+ m_input_context.should_skip_bom = should_skip_bom;
+}
+
+/* This is a cache used by get_next_line to store the content of a
+ file to be searched for file lines. */
+class file_cache_slot
+{
+public:
+ file_cache_slot ();
+ ~file_cache_slot ();
+
+ void dump (FILE *out, int indent) const;
+ void DEBUG_FUNCTION dump () const { dump (stderr, 0); }
+
+ bool read_line_num (size_t line_num,
+ char ** line, ssize_t *line_len);
+
+ /* Accessors. */
+ const char *get_file_path () const { return m_file_path; }
+ unsigned get_use_count () const { return m_use_count; }
+ bool missing_trailing_newline_p () const
+ {
+ return m_missing_trailing_newline;
+ }
+ char_span get_full_file_content ();
+
+ void inc_use_count () { m_use_count++; }
+
+ bool create (const file_cache::input_context &in_context,
+ const char *file_path, FILE *fp, unsigned highest_use_count);
+ void evict ();
+ void set_content (const char *buf, size_t sz);
+
+ static size_t tune (size_t line_record_size_)
+ {
+ size_t ret = line_record_size;
+ line_record_size = line_record_size_;
+ return ret;
+ }
+
+ private:
+ /* These are information used to store a line boundary. */
+ class line_info
+ {
+ public:
+ /* The line number. It starts from 1. */
+ size_t line_num;
+
+ /* The position (byte count) of the beginning of the line,
+ relative to the file data pointer. This starts at zero. */
+ size_t start_pos;
+
+ /* The position (byte count) of the last byte of the line. This
+ normally points to the '\n' character, or to one byte after the
+ last byte of the file, if the file doesn't contain a '\n'
+ character. */
+ size_t end_pos;
+
+ line_info (size_t l, size_t s, size_t e)
+ : line_num (l), start_pos (s), end_pos (e)
+ {}
+
+ line_info ()
+ :line_num (0), start_pos (0), end_pos (0)
+ {}
+
+ static bool less_than(const line_info &a, const line_info &b)
+ {
+ return a.line_num < b.line_num;
+ }
+ };
+
+ bool needs_read_p () const;
+ bool needs_grow_p () const;
+ void maybe_grow ();
+ bool read_data ();
+ bool maybe_read_data ();
+ bool get_next_line (char **line, ssize_t *line_len);
+ bool read_next_line (char ** line, ssize_t *line_len);
+ bool goto_next_line ();
+
+ static const size_t buffer_size = 4 * 1024;
+ static size_t line_record_size;
+ static size_t recent_cached_lines_shift;
+
+ /* The number of time this file has been accessed. This is used
+ to designate which file cache to evict from the cache
+ array. */
+ unsigned m_use_count;
+
+ /* The file_path is the key for identifying a particular file in
+ the cache. This copy is owned by the slot. */
+ char *m_file_path;
+
+ FILE *m_fp;
+
+ /* True when an read error happened. */
+ bool m_error;
+
+ /* This points to the content of the file that we've read so
+ far. */
+ char *m_data;
+
+ /* The allocated buffer to be freed may start a little earlier than DATA,
+ e.g. if a UTF8 BOM was skipped at the beginning. */
+ int m_alloc_offset;
+
+ /* The size of the DATA array above.*/
+ size_t m_size;
+
+ /* The number of bytes read from the underlying file so far. This
+ must be less (or equal) than SIZE above. */
+ size_t m_nb_read;
+
+ /* The index of the beginning of the current line. */
+ size_t m_line_start_idx;
+
+ /* The number of the previous line read. This starts at 1. Zero
+ means we've read no line so far. */
+ size_t m_line_num;
+
+ /* Could this file be missing a trailing newline on its final line?
+ Initially true (to cope with empty files), set to true/false
+ as each line is read. */
+ bool m_missing_trailing_newline;
+
+ /* This is a record of the beginning and end of the lines we've seen
+ while reading the file. This is useful to avoid walking the data
+ from the beginning when we are asked to read a line that is
+ before LINE_START_IDX above. When the lines exceed line_record_size
+ this is scaled down dynamically, with the line_info becoming anchors. */
+ vec<line_info, va_heap> m_line_record;
+
+ /* A cache of the recently seen lines. This is maintained as a ring
+ buffer. */
+ vec<line_info, va_heap> m_line_recent;
+
+ /* First and last valid entry in m_line_recent. */
+ size_t m_line_recent_last, m_line_recent_first;
+
+ void offset_buffer (int offset)
+ {
+ gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
+ : (size_t) offset <= m_size);
+ gcc_assert (m_data);
+ m_alloc_offset += offset;
+ m_data += offset;
+ m_size -= offset;
+ }
+
+};
+
+size_t file_cache_slot::line_record_size = 0;
+size_t file_cache_slot::recent_cached_lines_shift = 8;
+
+/* Tune file_cache. */
+void
+file_cache::tune (size_t num_file_slots, size_t lines)
+{
+ if (file_cache_slot::tune (lines) != lines
+ || m_num_file_slots != num_file_slots)
+ {
+ delete[] m_file_slots;
+ m_file_slots = new file_cache_slot[num_file_slots];
+ }
+ m_num_file_slots = num_file_slots;
+}
+
+static const char *
+find_end_of_line (const char *s, size_t len);
+
+/* Lookup the cache used for the content of a given file accessed by
+ caret diagnostic. Return the found cached file, or NULL if no
+ cached file was found. */
+
+file_cache_slot *
+file_cache::lookup_file (const char *file_path)
+{
+ gcc_assert (file_path);
+
+ /* This will contain the found cached file. */
+ file_cache_slot *r = NULL;
+ for (unsigned i = 0; i < m_num_file_slots; ++i)
+ {
+ file_cache_slot *c = &m_file_slots[i];
+ if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
+ {
+ c->inc_use_count ();
+ r = c;
+ }
+ }
+
+ if (r)
+ r->inc_use_count ();
+
+ return r;
+}
+
+/* Purge any mention of FILENAME from the cache of files used for
+ printing source code. For use in selftests when working
+ with tempfiles. */
+
+void
+file_cache::forcibly_evict_file (const char *file_path)
+{
+ gcc_assert (file_path);
+
+ file_cache_slot *r = lookup_file (file_path);
+ if (!r)
+ /* Not found. */
+ return;
+
+ r->evict ();
+}
+
+/* Determine if FILE_PATH missing a trailing newline on its final line.
+ Only valid to call once all of the file has been loaded, by
+ requesting a line number beyond the end of the file. */
+
+bool
+file_cache::missing_trailing_newline_p (const char *file_path)
+{
+ gcc_assert (file_path);
+
+ file_cache_slot *r = lookup_or_add_file (file_path);
+ return r->missing_trailing_newline_p ();
+}
+
+void
+file_cache::add_buffered_content (const char *file_path,
+ const char *buffer,
+ size_t sz)
+{
+ gcc_assert (file_path);
+
+ file_cache_slot *r = lookup_file (file_path);
+ if (!r)
+ {
+ unsigned highest_use_count = 0;
+ r = evicted_cache_tab_entry (&highest_use_count);
+ if (!r->create (m_input_context, file_path, nullptr, highest_use_count))
+ return;
+ }
+
+ r->set_content (buffer, sz);
+}
+
+void
+file_cache_slot::evict ()
+{
+ free (m_file_path);
+ m_file_path = NULL;
+ if (m_fp)
+ fclose (m_fp);
+ m_error = false;
+ m_fp = NULL;
+ m_nb_read = 0;
+ m_line_start_idx = 0;
+ m_line_num = 0;
+ m_line_record.truncate (0);
+ m_line_recent_first = 0;
+ m_line_recent_last = 0;
+ m_use_count = 0;
+ m_missing_trailing_newline = true;
+}
+
+/* Return the file cache that has been less used, recently, or the
+ first empty one. If HIGHEST_USE_COUNT is non-null,
+ *HIGHEST_USE_COUNT is set to the highest use count of the entries
+ in the cache table. */
+
+file_cache_slot*
+file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
+{
+ file_cache_slot *to_evict = &m_file_slots[0];
+ unsigned huc = to_evict->get_use_count ();
+ for (unsigned i = 1; i < m_num_file_slots; ++i)
+ {
+ file_cache_slot *c = &m_file_slots[i];
+ bool c_is_empty = (c->get_file_path () == NULL);
+
+ if (c->get_use_count () < to_evict->get_use_count ()
+ || (to_evict->get_file_path () && c_is_empty))
+ /* We evict C because it's either an entry with a lower use
+ count or one that is empty. */
+ to_evict = c;
+
+ if (huc < c->get_use_count ())
+ huc = c->get_use_count ();
+
+ if (c_is_empty)
+ /* We've reached the end of the cache; subsequent elements are
+ all empty. */
+ break;
+ }
+
+ if (highest_use_count)
+ *highest_use_count = huc;
+
+ return to_evict;
+}
+
+/* Create the cache used for the content of a given file to be
+ accessed by caret diagnostic. This cache is added to an array of
+ cache and can be retrieved by lookup_file_in_cache_tab. This
+ function returns the created cache. Note that only the last
+ m_num_file_slots files are cached.
+
+ This can return nullptr if the FILE_PATH can't be opened for
+ reading, or if the content can't be converted to the input_charset. */
+
+file_cache_slot*
+file_cache::add_file (const char *file_path)
+{
+
+ FILE *fp = fopen (file_path, "r");
+ if (fp == NULL)
+ return NULL;
+
+ unsigned highest_use_count = 0;
+ file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
+ if (!r->create (m_input_context, file_path, fp, highest_use_count))
+ return NULL;
+ return r;
+}
+
+/* Get a borrowed char_span to the full content of this file
+ as decoded according to the input charset, encoded as UTF-8. */
+
+char_span
+file_cache_slot::get_full_file_content ()
+{
+ char *line;
+ ssize_t line_len;
+ while (get_next_line (&line, &line_len))
+ {
+ }
+ return char_span (m_data, m_nb_read);
+}
+
+/* Populate this slot for use on FILE_PATH and FP, dropping any
+ existing cached content within it. */
+
+bool
+file_cache_slot::create (const file_cache::input_context &in_context,
+ const char *file_path, FILE *fp,
+ unsigned highest_use_count)
+{
+ m_file_path = file_path ? xstrdup (file_path) : nullptr;
+ if (m_fp)
+ fclose (m_fp);
+ m_error = false;
+ m_fp = fp;
+ if (m_alloc_offset)
+ offset_buffer (-m_alloc_offset);
+ m_nb_read = 0;
+ m_line_start_idx = 0;
+ m_line_num = 0;
+ m_line_recent_first = 0;
+ m_line_recent_last = 0;
+ m_line_record.truncate (0);
+ /* Ensure that this cache entry doesn't get evicted next time
+ add_file_to_cache_tab is called. */
+ m_use_count = ++highest_use_count;
+ m_missing_trailing_newline = true;
+
+
+ /* Check the input configuration to determine if we need to do any
+ transformations, such as charset conversion or BOM skipping. */
+ if (const char *input_charset = in_context.ccb (file_path))
+ {
+ /* Need a full-blown conversion of the input charset. */
+ fclose (m_fp);
+ m_fp = NULL;
+ const cpp_converted_source cs
+ = cpp_get_converted_source (file_path, input_charset);
+ if (!cs.data)
+ return false;
+ if (m_data)
+ XDELETEVEC (m_data);
+ m_data = cs.data;
+ m_nb_read = m_size = cs.len;
+ m_alloc_offset = cs.data - cs.to_free;
+ }
+ else if (in_context.should_skip_bom)
+ {
+ if (read_data ())
+ {
+ const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
+ offset_buffer (offset);
+ m_nb_read -= offset;
+ }
+ }
+
+ return true;
+}
+
+void
+file_cache_slot::set_content (const char *buf, size_t sz)
+{
+ m_data = (char *)xmalloc (sz);
+ memcpy (m_data, buf, sz);
+ m_nb_read = m_size = sz;
+ m_alloc_offset = 0;
+
+ if (m_fp)
+ {
+ fclose (m_fp);
+ m_fp = nullptr;
+ }
+}
+
+/* file_cache's ctor. */
+
+file_cache::file_cache ()
+: m_num_file_slots (16), m_file_slots (new file_cache_slot[m_num_file_slots])
+{
+ initialize_input_context (nullptr, false);
+}
+
+/* file_cache's dtor. */
+
+file_cache::~file_cache ()
+{
+ delete[] m_file_slots;
+}
+
+void
+file_cache::dump (FILE *out, int indent) const
+{
+ for (size_t i = 0; i < m_num_file_slots; ++i)
+ {
+ fprintf (out, "%*sslot[%i]:\n", indent, "", (int)i);
+ m_file_slots[i].dump (out, indent + 2);
+ }
+}
+
+void
+file_cache::dump () const
+{
+ dump (stderr, 0);
+}
+
+/* Lookup the cache used for the content of a given file accessed by
+ caret diagnostic. If no cached file was found, create a new cache
+ for this file, add it to the array of cached file and return
+ it.
+
+ This can return nullptr on a cache miss if FILE_PATH can't be opened for
+ reading, or if the content can't be converted to the input_charset. */
+
+file_cache_slot*
+file_cache::lookup_or_add_file (const char *file_path)
+{
+ file_cache_slot *r = lookup_file (file_path);
+ if (r == NULL)
+ r = add_file (file_path);
+ return r;
+}
+
+/* Default constructor for a cache of file used by caret
+ diagnostic. */
+
+file_cache_slot::file_cache_slot ()
+: m_use_count (0), m_file_path (NULL), m_fp (NULL), m_error (false), m_data (0),
+ m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
+ m_line_num (0), m_missing_trailing_newline (true),
+ m_line_recent_last (0), m_line_recent_first (0)
+{
+ m_line_record.create (0);
+ m_line_recent.create (1U << recent_cached_lines_shift);
+ for (int i = 0; i < 1 << recent_cached_lines_shift; i++)
+ m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0));
+}
+
+/* Destructor for a cache of file used by caret diagnostic. */
+
+file_cache_slot::~file_cache_slot ()
+{
+ free (m_file_path);
+ if (m_fp)
+ {
+ fclose (m_fp);
+ m_fp = NULL;
+ }
+ if (m_data)
+ {
+ offset_buffer (-m_alloc_offset);
+ XDELETEVEC (m_data);
+ m_data = 0;
+ }
+ m_line_record.release ();
+ m_line_recent.release ();
+}
+
+void
+file_cache_slot::dump (FILE *out, int indent) const
+{
+ if (!m_file_path)
+ {
+ fprintf (out, "%*s(unused)\n", indent, "");
+ return;
+ }
+ fprintf (out, "%*sfile_path: %s\n", indent, "", m_file_path);
+ fprintf (out, "%*sfp: %p\n", indent, "", (void *)m_fp);
+ fprintf (out, "%*sneeds_read_p: %i\n", indent, "", (int)needs_read_p ());
+ fprintf (out, "%*sneeds_grow_p: %i\n", indent, "", (int)needs_grow_p ());
+ fprintf (out, "%*suse_count: %i\n", indent, "", m_use_count);
+ fprintf (out, "%*ssize: %zi\n", indent, "", m_size);
+ fprintf (out, "%*snb_read: %zi\n", indent, "", m_nb_read);
+ fprintf (out, "%*sstart_line_idx: %zi\n", indent, "", m_line_start_idx);
+ fprintf (out, "%*sline_num: %zi\n", indent, "", m_line_num);
+ fprintf (out, "%*smissing_trailing_newline: %i\n",
+ indent, "", (int)m_missing_trailing_newline);
+ fprintf (out, "%*sline records (%i):\n",
+ indent, "", m_line_record.length ());
+ int idx = 0;
+ for (auto &line : m_line_record)
+ fprintf (out, "%*s[%i]: line %zi: byte offsets: %zi-%zi\n",
+ indent + 2, "",
+ idx++, line.line_num, line.start_pos, line.end_pos);
+}
+
+/* Returns TRUE iff the cache would need to be filled with data coming
+ from the file. That is, either the cache is empty or full or the
+ current line is empty. Note that if the cache is full, it would
+ need to be extended and filled again. */
+
+bool
+file_cache_slot::needs_read_p () const
+{
+ return m_fp && (m_nb_read == 0
+ || m_nb_read == m_size
+ || (m_line_start_idx >= m_nb_read - 1));
+}
+
+/* Return TRUE iff the cache is full and thus needs to be
+ extended. */
+
+bool
+file_cache_slot::needs_grow_p () const
+{
+ return m_nb_read == m_size;
+}
+
+/* Grow the cache if it needs to be extended. */
+
+void
+file_cache_slot::maybe_grow ()
+{
+ if (!needs_grow_p ())
+ return;
+
+ if (!m_data)
+ {
+ gcc_assert (m_size == 0 && m_alloc_offset == 0);
+ m_size = buffer_size;
+ m_data = XNEWVEC (char, m_size);
+ }
+ else
+ {
+ const int offset = m_alloc_offset;
+ offset_buffer (-offset);
+ m_size *= 2;
+ m_data = XRESIZEVEC (char, m_data, m_size);
+ offset_buffer (offset);
+ }
+}
+
+/* Read more data into the cache. Extends the cache if need be.
+ Returns TRUE iff new data could be read. */
+
+bool
+file_cache_slot::read_data ()
+{
+ if (feof (m_fp) || ferror (m_fp))
+ return false;
+
+ maybe_grow ();
+
+ char * from = m_data + m_nb_read;
+ size_t to_read = m_size - m_nb_read;
+ size_t nb_read = fread (from, 1, to_read, m_fp);
+
+ if (ferror (m_fp))
+ {
+ m_error = true;
+ return false;
+ }
+
+ m_nb_read += nb_read;
+ return !!nb_read;
+}
+
+/* Read new data iff the cache needs to be filled with more data
+ coming from the file FP. Return TRUE iff the cache was filled with
+ mode data. */
+
+bool
+file_cache_slot::maybe_read_data ()
+{
+ if (!needs_read_p ())
+ return false;
+ return read_data ();
+}
+
+/* Helper function for file_cache_slot::get_next_line (), to find the end of
+ the next line. Returns with the memchr convention, i.e. nullptr if a line
+ terminator was not found. We need to determine line endings in the same
+ manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
+
+static const char *
+find_end_of_line (const char *s, size_t len)
+{
+ for (const auto end = s + len; s != end; ++s)
+ {
+ if (*s == '\n')
+ return s;
+ if (*s == '\r')
+ {
+ const auto next = s + 1;
+ if (next == end)
+ {
+ /* Don't find the line ending if \r is the very last character
+ in the buffer; we do not know if it's the end of the file or
+ just the end of what has been read so far, and we wouldn't
+ want to break in the middle of what's actually a \r\n
+ sequence. Instead, we will handle the case of a file ending
+ in a \r later. */
+ break;
+ }
+ return (*next == '\n' ? next : s);
+ }
+ }
+ return nullptr;
+}
+
+/* Read a new line from file FP, using C as a cache for the data
+ coming from the file. Upon successful completion, *LINE is set to
+ the beginning of the line found. *LINE points directly in the
+ line cache and is only valid until the next call of get_next_line.
+ *LINE_LEN is set to the length of the line. Note that the line
+ does not contain any terminal delimiter. This function returns
+ true if some data was read or process from the cache, false
+ otherwise. Note that subsequent calls to get_next_line might
+ make the content of *LINE invalid. */
+
+bool
+file_cache_slot::get_next_line (char **line, ssize_t *line_len)
+{
+ /* Fill the cache with data to process. */
+ maybe_read_data ();
+
+ size_t remaining_size = m_nb_read - m_line_start_idx;
+ if (remaining_size == 0)
+ /* There is no more data to process. */
+ return false;
+
+ const char *line_start = m_data + m_line_start_idx;
+
+ const char *next_line_start = NULL;
+ size_t len = 0;
+ const char *line_end = find_end_of_line (line_start, remaining_size);
+ if (line_end == NULL)
+ {
+ /* We haven't found an end-of-line delimiter in the cache.
+ Fill the cache with more data from the file and look again. */
+ while (maybe_read_data ())
+ {
+ line_start = m_data + m_line_start_idx;
+ remaining_size = m_nb_read - m_line_start_idx;
+ line_end = find_end_of_line (line_start, remaining_size);
+ if (line_end != NULL)
+ {
+ next_line_start = line_end + 1;
+ break;
+ }
+ }
+ if (line_end == NULL)
+ {
+ /* We've loaded all the file into the cache and still no
+ terminator. Let's say the line ends up at one byte past the
+ end of the file. This is to stay consistent with the case
+ of when the line ends up with a terminator and line_end points to
+ that. That consistency is useful below in the len calculation.
+
+ If the file ends in a \r, we didn't identify it as a line
+ terminator above, so do that now instead. */
+ line_end = m_data + m_nb_read;
+ if (m_nb_read && line_end[-1] == '\r')
+ {
+ --line_end;
+ m_missing_trailing_newline = false;
+ }
+ else
+ m_missing_trailing_newline = true;
+ }
+ else
+ m_missing_trailing_newline = false;
+ }
+ else
+ {
+ next_line_start = line_end + 1;
+ m_missing_trailing_newline = false;
+ }
+
+ if (m_error)
+ return false;
+
+ /* At this point, we've found the end of the of line. It either points to
+ the line terminator or to one byte after the last byte of the file. */
+ gcc_assert (line_end != NULL);
+
+ len = line_end - line_start;
+
+ if (m_line_start_idx < m_nb_read)
+ *line = const_cast<char *> (line_start);
+
+ ++m_line_num;
+
+ /* Now update our line record so that re-reading lines from the
+ before m_line_start_idx is faster. */
+ size_t rlen = m_line_record.length ();
+ /* Only update when beyond the previously cached region. */
+ if (rlen == 0 || m_line_record[rlen - 1].line_num < m_line_num)
+ {
+ size_t spacing
+ = (rlen >= 2
+ ? (m_line_record[rlen - 1].line_num
+ - m_line_record[rlen - 2].line_num) : 1);
+ size_t delta
+ = rlen >= 1 ? m_line_num - m_line_record[rlen - 1].line_num : 1;
+
+ size_t max_size = line_record_size;
+ /* One anchor per hundred input lines. */
+ if (max_size == 0)
+ max_size = m_line_num / 100;
+
+ /* If we're too far beyond drop half of the lines to rebalance. */
+ if (rlen == max_size && delta >= spacing * 2)
+ {
+ size_t j = 0;
+ for (size_t i = 1; i < rlen; i += 2)
+ m_line_record[j++] = m_line_record[i];
+ m_line_record.truncate (j);
+ rlen = j;
+ spacing *= 2;
+ }
+
+ if (rlen < max_size && delta >= spacing)
+ {
+ file_cache_slot::line_info li (m_line_num, m_line_start_idx,
+ line_end - m_data);
+ m_line_record.safe_push (li);
+ }
+ }
+
+ /* Cache recent tail lines separately for fast access. This assumes
+ most accesses do not skip backwards. */
+ if (m_line_recent_last == m_line_recent_first
+ || m_line_recent[m_line_recent_last].line_num == m_line_num - 1)
+ {
+ size_t mask = ((size_t) 1 << recent_cached_lines_shift) - 1;
+ m_line_recent_last = (m_line_recent_last + 1) & mask;
+ if (m_line_recent_last == m_line_recent_first)
+ m_line_recent_first = (m_line_recent_first + 1) & mask;
+ m_line_recent[m_line_recent_last]
+ = file_cache_slot::line_info (m_line_num, m_line_start_idx,
+ line_end - m_data);
+ }
+
+ /* Update m_line_start_idx so that it points to the next line to be
+ read. */
+ if (next_line_start)
+ m_line_start_idx = next_line_start - m_data;
+ else
+ /* We didn't find any terminal '\n'. Let's consider that the end
+ of line is the end of the data in the cache. The next
+ invocation of get_next_line will either read more data from the
+ underlying file or return false early because we've reached the
+ end of the file. */
+ m_line_start_idx = m_nb_read;
+
+ *line_len = len;
+
+ return true;
+}
+
+/* Consume the next bytes coming from the cache (or from its
+ underlying file if there are remaining unread bytes in the file)
+ until we reach the next end-of-line (or end-of-file). There is no
+ copying from the cache involved. Return TRUE upon successful
+ completion. */
+
+bool
+file_cache_slot::goto_next_line ()
+{
+ char *l;
+ ssize_t len;
+
+ return get_next_line (&l, &len);
+}
+
+/* Read an arbitrary line number LINE_NUM from the file cached in C.
+ If the line was read successfully, *LINE points to the beginning
+ of the line in the file cache and *LINE_LEN is the length of the
+ line. *LINE is not nul-terminated, but may contain zero bytes.
+ *LINE is only valid until the next call of read_line_num.
+ This function returns bool if a line was read. */
+
+bool
+file_cache_slot::read_line_num (size_t line_num,
+ char ** line, ssize_t *line_len)
+{
+ gcc_assert (line_num > 0);
+
+ /* Is the line in the recent line cache?
+ This assumes the main file processing is only using
+ a single contiguous cursor with only temporary excursions. */
+ if (m_line_recent_first != m_line_recent_last
+ && m_line_recent[m_line_recent_first].line_num <= line_num
+ && m_line_recent[m_line_recent_last].line_num >= line_num)
+ {
+ line_info &last = m_line_recent[m_line_recent_last];
+ size_t mask = (1U << recent_cached_lines_shift) - 1;
+ size_t idx = (m_line_recent_last - (last.line_num - line_num)) & mask;
+ line_info &recent = m_line_recent[idx];
+ gcc_assert (recent.line_num == line_num);
+ *line = m_data + recent.start_pos;
+ *line_len = recent.end_pos - recent.start_pos;
+ return true;
+ }
+
+ if (line_num <= m_line_num)
+ {
+ line_info l (line_num, 0, 0);
+ int i = m_line_record.lower_bound (l, line_info::less_than);
+ if (i == 0)
+ {
+ m_line_start_idx = 0;
+ m_line_num = 0;
+ }
+ else if (m_line_record[i - 1].line_num == line_num)
+ {
+ /* We have the start/end of the line. */
+ *line = m_data + m_line_record[i - 1].start_pos;
+ *line_len = m_line_record[i - 1].end_pos - m_line_record[i - 1].start_pos;
+ return true;
+ }
+ else
+ {
+ gcc_assert (m_line_record[i - 1].line_num < m_line_num);
+ m_line_start_idx = m_line_record[i - 1].start_pos;
+ m_line_num = m_line_record[i - 1].line_num - 1;
+ }
+ }
+
+ /* Let's walk from line m_line_num up to line_num - 1, without
+ copying any line. */
+ while (m_line_num < line_num - 1)
+ if (!goto_next_line ())
+ return false;
+
+ /* The line we want is the next one. Let's read it. */
+ return get_next_line (line, line_len);
+}
+
+/* Return the physical source line that corresponds to FILE_PATH/LINE.
+ The line is not nul-terminated. The returned pointer is only
+ valid until the next call of location_get_source_line.
+ Note that the line can contain several null characters,
+ so the returned value's length has the actual length of the line.
+ If the function fails, a NULL char_span is returned. */
+
+char_span
+file_cache::get_source_line (const char *file_path, int line)
+{
+ char *buffer = NULL;
+ ssize_t len;
+
+ if (line == 0)
+ return char_span (NULL, 0);
+
+ if (file_path == NULL)
+ return char_span (NULL, 0);
+
+ file_cache_slot *c = lookup_or_add_file (file_path);
+ if (c == NULL)
+ return char_span (NULL, 0);
+
+ bool read = c->read_line_num (line, &buffer, &len);
+ if (!read)
+ return char_span (NULL, 0);
+
+ return char_span (buffer, len);
+}
+
+char_span
+file_cache::get_source_file_content (const char *file_path)
+{
+ file_cache_slot *c = lookup_or_add_file (file_path);
+ if (c == nullptr)
+ return char_span (nullptr, 0);
+ return c->get_full_file_content ();
+}
+
+#if CHECKING_P
+
+namespace selftest {
+
+ using temp_source_file = ::selftest::temp_source_file;
+
+/* Verify reading of a specific line LINENUM in TMP, FC. */
+
+static void
+check_line (temp_source_file &tmp, file_cache &fc, int linenum)
+{
+ char_span line = fc.get_source_line (tmp.get_filename (), linenum);
+ int n;
+ const char *b = line.get_buffer ();
+ size_t l = line.length ();
+ char buf[5];
+ ASSERT_LT (l, 5);
+ memcpy (buf, b, l);
+ buf[l] = '\0';
+ ASSERT_TRUE (sscanf (buf, "%d", &n) == 1);
+ ASSERT_EQ (n, linenum);
+}
+
+/* Test file cache replacement. */
+
+static void
+test_replacement ()
+{
+ const int maxline = 1000;
+
+ char *vec = XNEWVEC (char, maxline * 5);
+ char *p = vec;
+ int i;
+ for (i = 1; i <= maxline; i++)
+ p += sprintf (p, "%d\n", i);
+
+ temp_source_file tmp (SELFTEST_LOCATION, ".txt", vec);
+ free (vec);
+ file_cache fc;
+
+ for (i = 2; i <= maxline; i++)
+ {
+ check_line (tmp, fc, i);
+ check_line (tmp, fc, i - 1);
+ if (i >= 10)
+ check_line (tmp, fc, i - 9);
+ if (i >= 350) /* Exceed the look behind cache. */
+ check_line (tmp, fc, i - 300);
+ }
+ for (i = 5; i <= maxline; i += 100)
+ check_line (tmp, fc, i);
+ for (i = 1; i <= maxline; i++)
+ check_line (tmp, fc, i);
+}
+
+/* Verify reading of input files (e.g. for caret-based diagnostics). */
+
+static void
+test_reading_source_line ()
+{
+ /* Create a tempfile and write some text to it. */
+ temp_source_file tmp (SELFTEST_LOCATION, ".txt",
+ "01234567890123456789\n"
+ "This is the test text\n"
+ "This is the 3rd line");
+ file_cache fc;
+
+ /* Read back a specific line from the tempfile. */
+ char_span source_line = fc.get_source_line (tmp.get_filename (), 3);
+ ASSERT_TRUE (source_line);
+ ASSERT_TRUE (source_line.get_buffer () != NULL);
+ ASSERT_EQ (20, source_line.length ());
+ ASSERT_TRUE (!strncmp ("This is the 3rd line",
+ source_line.get_buffer (), source_line.length ()));
+
+ source_line = fc.get_source_line (tmp.get_filename (), 2);
+ ASSERT_TRUE (source_line);
+ ASSERT_TRUE (source_line.get_buffer () != NULL);
+ ASSERT_EQ (21, source_line.length ());
+ ASSERT_TRUE (!strncmp ("This is the test text",
+ source_line.get_buffer (), source_line.length ()));
+
+ source_line = fc.get_source_line (tmp.get_filename (), 4);
+ ASSERT_FALSE (source_line);
+ ASSERT_TRUE (source_line.get_buffer () == NULL);
+}
+
+/* Verify reading from buffers (e.g. for sarif-replay). */
+
+static void
+test_reading_source_buffer ()
+{
+ const char *text = ("01234567890123456789\n"
+ "This is the test text\n"
+ "This is the 3rd line");
+ const char *filename = "foo.txt";
+ file_cache fc;
+ fc.add_buffered_content (filename, text, strlen (text));
+
+ /* Read back a specific line from the tempfile. */
+ char_span source_line = fc.get_source_line (filename, 3);
+ ASSERT_TRUE (source_line);
+ ASSERT_TRUE (source_line.get_buffer () != NULL);
+ ASSERT_EQ (20, source_line.length ());
+ ASSERT_TRUE (!strncmp ("This is the 3rd line",
+ source_line.get_buffer (), source_line.length ()));
+
+ source_line = fc.get_source_line (filename, 2);
+ ASSERT_TRUE (source_line);
+ ASSERT_TRUE (source_line.get_buffer () != NULL);
+ ASSERT_EQ (21, source_line.length ());
+ ASSERT_TRUE (!strncmp ("This is the test text",
+ source_line.get_buffer (), source_line.length ()));
+
+ source_line = fc.get_source_line (filename, 4);
+ ASSERT_FALSE (source_line);
+ ASSERT_TRUE (source_line.get_buffer () == NULL);
+}
+
+/* Run all of the selftests within this file. */
+
+void
+file_cache_cc_tests ()
+{
+ test_reading_source_line ();
+ test_reading_source_buffer ();
+ test_replacement ();
+}
+
+} // namespace selftest
+
+#endif /* CHECKING_P */
+
+} // namespace diagnostics