aboutsummaryrefslogtreecommitdiff
path: root/gcc/input.cc
diff options
context:
space:
mode:
authorLewis Hyatt <lhyatt@gmail.com>2022-07-07 13:59:27 -0400
committerLewis Hyatt <lhyatt@gmail.com>2022-07-08 09:43:33 -0400
commit2bd15617e73acf76207127e5580cd10b1fab91a5 (patch)
tree35be9178e2ae2b13bc5a7c6bf4ff695c2f2f565e /gcc/input.cc
parent6da7f7c5ac03f85a753989712588511e5f56e73d (diff)
downloadgcc-2bd15617e73acf76207127e5580cd10b1fab91a5.zip
gcc-2bd15617e73acf76207127e5580cd10b1fab91a5.tar.gz
gcc-2bd15617e73acf76207127e5580cd10b1fab91a5.tar.bz2
diagnostics: Make line-ending logic consistent with libcpp [PR91733]
libcpp recognizes a lone \r as a valid line ending, so the infrastructure for retrieving source lines to be output in diagnostics needs to do the same. This patch fixes file_cache_slot::get_next_line() accordingly so that diagnostics display the correct part of the source when \r line endings are in use. gcc/ChangeLog: PR preprocessor/91733 * input.cc (find_end_of_line): New helper function. (file_cache_slot::get_next_line): Recognize \r as a line ending. * diagnostic-show-locus.cc (test_escaping_bytes_1): Adapt selftest since \r will now be interpreted as a line-ending. gcc/testsuite/ChangeLog: PR preprocessor/91733 * c-c++-common/pr91733.c: New test.
Diffstat (limited to 'gcc/input.cc')
-rw-r--r--gcc/input.cc67
1 files changed, 52 insertions, 15 deletions
diff --git a/gcc/input.cc b/gcc/input.cc
index 2acbfde..060ca16 100644
--- a/gcc/input.cc
+++ b/gcc/input.cc
@@ -646,6 +646,37 @@ file_cache_slot::maybe_read_data ()
return read_data ();
}
+/* Helper function for file_cache_slot::get_next_line (), to find the end of
+ the next line. Returns with the memchr convention, i.e. nullptr if a line
+ terminator was not found. We need to determine line endings in the same
+ manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
+
+static char *
+find_end_of_line (char *s, size_t len)
+{
+ for (const auto end = s + len; s != end; ++s)
+ {
+ if (*s == '\n')
+ return s;
+ if (*s == '\r')
+ {
+ const auto next = s + 1;
+ if (next == end)
+ {
+ /* Don't find the line ending if \r is the very last character
+ in the buffer; we do not know if it's the end of the file or
+ just the end of what has been read so far, and we wouldn't
+ want to break in the middle of what's actually a \r\n
+ sequence. Instead, we will handle the case of a file ending
+ in a \r later. */
+ break;
+ }
+ return (*next == '\n' ? next : s);
+ }
+ }
+ return nullptr;
+}
+
/* Read a new line from file FP, using C as a cache for the data
coming from the file. Upon successful completion, *LINE is set to
the beginning of the line found. *LINE points directly in the
@@ -671,17 +702,16 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
char *next_line_start = NULL;
size_t len = 0;
- char *line_end = (char *) memchr (line_start, '\n', remaining_size);
+ char *line_end = find_end_of_line (line_start, remaining_size);
if (line_end == NULL)
{
- /* We haven't found the end-of-line delimiter in the cache.
- Fill the cache with more data from the file and look for the
- '\n'. */
+ /* We haven't found an end-of-line delimiter in the cache.
+ Fill the cache with more data from the file and look again. */
while (maybe_read_data ())
{
line_start = m_data + m_line_start_idx;
remaining_size = m_nb_read - m_line_start_idx;
- line_end = (char *) memchr (line_start, '\n', remaining_size);
+ line_end = find_end_of_line (line_start, remaining_size);
if (line_end != NULL)
{
next_line_start = line_end + 1;
@@ -690,14 +720,22 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
}
if (line_end == NULL)
{
- /* We've loadded all the file into the cache and still no
- '\n'. Let's say the line ends up at one byte passed the
+ /* We've loaded all the file into the cache and still no
+ terminator. Let's say the line ends up at one byte past the
end of the file. This is to stay consistent with the case
- of when the line ends up with a '\n' and line_end points to
- that terminal '\n'. That consistency is useful below in
- the len calculation. */
- line_end = m_data + m_nb_read ;
- m_missing_trailing_newline = true;
+ of when the line ends up with a terminator and line_end points to
+ that. That consistency is useful below in the len calculation.
+
+ If the file ends in a \r, we didn't identify it as a line
+ terminator above, so do that now instead. */
+ line_end = m_data + m_nb_read;
+ if (m_nb_read && line_end[-1] == '\r')
+ {
+ --line_end;
+ m_missing_trailing_newline = false;
+ }
+ else
+ m_missing_trailing_newline = true;
}
else
m_missing_trailing_newline = false;
@@ -711,9 +749,8 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
if (m_fp && ferror (m_fp))
return false;
- /* At this point, we've found the end of the of line. It either
- points to the '\n' or to one byte after the last byte of the
- file. */
+ /* At this point, we've found the end of the of line. It either points to
+ the line terminator or to one byte after the last byte of the file. */
gcc_assert (line_end != NULL);
len = line_end - line_start;