aboutsummaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/charset.c63
-rw-r--r--libcpp/errors.c82
-rw-r--r--libcpp/include/cpplib.h76
-rw-r--r--libcpp/include/line-map.h13
-rw-r--r--libcpp/internal.h23
-rw-r--r--libcpp/lex.c38
-rw-r--r--libcpp/line-map.c3
7 files changed, 246 insertions, 52 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c
index e4e45f6..0b0ccc6 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -1582,12 +1582,14 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
"unknown escape sequence: '\\%c'", (int) c);
else
{
+ encoding_rich_location rich_loc (pfile);
+
/* diagnostic.c does not support "%03o". When it does, this
code can use %03o directly in the diagnostic again. */
char buf[32];
sprintf(buf, "%03o", (int) c);
- cpp_error (pfile, CPP_DL_PEDWARN,
- "unknown escape sequence: '\\%s'", buf);
+ cpp_error_at (pfile, CPP_DL_PEDWARN, &rich_loc,
+ "unknown escape sequence: '\\%s'", buf);
}
}
@@ -2345,14 +2347,16 @@ cpp_string_location_reader::get_next ()
}
cpp_display_width_computation::
-cpp_display_width_computation (const char *data, int data_length, int tabstop) :
+cpp_display_width_computation (const char *data, int data_length,
+ const cpp_char_column_policy &policy) :
m_begin (data),
m_next (m_begin),
m_bytes_left (data_length),
- m_tabstop (tabstop),
+ m_policy (policy),
m_display_cols (0)
{
- gcc_assert (m_tabstop > 0);
+ gcc_assert (policy.m_tabstop > 0);
+ gcc_assert (policy.m_width_cb);
}
@@ -2364,19 +2368,28 @@ cpp_display_width_computation (const char *data, int data_length, int tabstop) :
point to a valid UTF-8-encoded sequence, then it will be treated as a single
byte with display width 1. m_cur_display_col is the current display column,
relative to which tab stops should be expanded. Returns the display width of
- the codepoint just processed. */
+ the codepoint just processed.
+ If OUT is non-NULL, it is populated. */
int
-cpp_display_width_computation::process_next_codepoint ()
+cpp_display_width_computation::process_next_codepoint (cpp_decoded_char *out)
{
cppchar_t c;
int next_width;
+ if (out)
+ out->m_start_byte = m_next;
+
if (*m_next == '\t')
{
++m_next;
--m_bytes_left;
- next_width = m_tabstop - (m_display_cols % m_tabstop);
+ next_width = m_policy.m_tabstop - (m_display_cols % m_policy.m_tabstop);
+ if (out)
+ {
+ out->m_ch = '\t';
+ out->m_valid_ch = true;
+ }
}
else if (one_utf8_to_cppchar ((const uchar **) &m_next, &m_bytes_left, &c)
!= 0)
@@ -2386,14 +2399,24 @@ cpp_display_width_computation::process_next_codepoint ()
of one. */
++m_next;
--m_bytes_left;
- next_width = 1;
+ next_width = m_policy.m_undecoded_byte_width;
+ if (out)
+ out->m_valid_ch = false;
}
else
{
/* one_utf8_to_cppchar() has updated m_next and m_bytes_left for us. */
- next_width = cpp_wcwidth (c);
+ next_width = m_policy.m_width_cb (c);
+ if (out)
+ {
+ out->m_ch = c;
+ out->m_valid_ch = true;
+ }
}
+ if (out)
+ out->m_next_byte = m_next;
+
m_display_cols += next_width;
return next_width;
}
@@ -2409,7 +2432,7 @@ cpp_display_width_computation::advance_display_cols (int n)
const int start = m_display_cols;
const int target = start + n;
while (m_display_cols < target && !done ())
- process_next_codepoint ();
+ process_next_codepoint (NULL);
return m_display_cols - start;
}
@@ -2417,29 +2440,33 @@ cpp_display_width_computation::advance_display_cols (int n)
how many display columns are occupied by the first COLUMN bytes. COLUMN
may exceed DATA_LENGTH, in which case the phantom bytes at the end are
treated as if they have display width 1. Tabs are expanded to the next tab
- stop, relative to the start of DATA. */
+ stop, relative to the start of DATA, and non-printable-ASCII characters
+ will be escaped as per POLICY. */
int
cpp_byte_column_to_display_column (const char *data, int data_length,
- int column, int tabstop)
+ int column,
+ const cpp_char_column_policy &policy)
{
const int offset = MAX (0, column - data_length);
- cpp_display_width_computation dw (data, column - offset, tabstop);
+ cpp_display_width_computation dw (data, column - offset, policy);
while (!dw.done ())
- dw.process_next_codepoint ();
+ dw.process_next_codepoint (NULL);
return dw.display_cols_processed () + offset;
}
/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
the least number of bytes that will result in at least DISPLAY_COL display
columns. The return value may exceed DATA_LENGTH if the entire string does
- not occupy enough display columns. */
+ not occupy enough display columns. Non-printable-ASCII characters
+ will be escaped as per POLICY. */
int
cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col, int tabstop)
+ int display_col,
+ const cpp_char_column_policy &policy)
{
- cpp_display_width_computation dw (data, data_length, tabstop);
+ cpp_display_width_computation dw (data, data_length, policy);
const int avail_display = dw.advance_display_cols (display_col);
return dw.bytes_processed () + MAX (0, display_col - avail_display);
}
diff --git a/libcpp/errors.c b/libcpp/errors.c
index 5e1bf33..f34334a 100644
--- a/libcpp/errors.c
+++ b/libcpp/errors.c
@@ -27,6 +27,31 @@ along with this program; see the file COPYING3. If not see
#include "cpplib.h"
#include "internal.h"
+/* Get a location_t for the current location in PFILE,
+ generally that of the previously lexed token. */
+
+location_t
+cpp_diagnostic_get_current_location (cpp_reader *pfile)
+{
+ if (CPP_OPTION (pfile, traditional))
+ {
+ if (pfile->state.in_directive)
+ return pfile->directive_line;
+ else
+ return pfile->line_table->highest_line;
+ }
+ /* We don't want to refer to a token before the beginning of the
+ current run -- that is invalid. */
+ else if (pfile->cur_token == pfile->cur_run->base)
+ {
+ return 0;
+ }
+ else
+ {
+ return pfile->cur_token[-1].src_loc;
+ }
+}
+
/* Print a diagnostic at the given location. */
ATTRIBUTE_FPTR_PRINTF(5,0)
@@ -52,25 +77,7 @@ cpp_diagnostic (cpp_reader * pfile, enum cpp_diagnostic_level level,
enum cpp_warning_reason reason,
const char *msgid, va_list *ap)
{
- location_t src_loc;
-
- if (CPP_OPTION (pfile, traditional))
- {
- if (pfile->state.in_directive)
- src_loc = pfile->directive_line;
- else
- src_loc = pfile->line_table->highest_line;
- }
- /* We don't want to refer to a token before the beginning of the
- current run -- that is invalid. */
- else if (pfile->cur_token == pfile->cur_run->base)
- {
- src_loc = 0;
- }
- else
- {
- src_loc = pfile->cur_token[-1].src_loc;
- }
+ location_t src_loc = cpp_diagnostic_get_current_location (pfile);
rich_location richloc (pfile->line_table, src_loc);
return cpp_diagnostic_at (pfile, level, reason, &richloc, msgid, ap);
}
@@ -144,6 +151,43 @@ cpp_warning_syshdr (cpp_reader * pfile, enum cpp_warning_reason reason,
return ret;
}
+/* As cpp_warning above, but use RICHLOC as the location of the diagnostic. */
+
+bool cpp_warning_at (cpp_reader *pfile, enum cpp_warning_reason reason,
+ rich_location *richloc, const char *msgid, ...)
+{
+ va_list ap;
+ bool ret;
+
+ va_start (ap, msgid);
+
+ ret = cpp_diagnostic_at (pfile, CPP_DL_WARNING, reason, richloc,
+ msgid, &ap);
+
+ va_end (ap);
+ return ret;
+
+}
+
+/* As cpp_pedwarning above, but use RICHLOC as the location of the
+ diagnostic. */
+
+bool
+cpp_pedwarning_at (cpp_reader * pfile, enum cpp_warning_reason reason,
+ rich_location *richloc, const char *msgid, ...)
+{
+ va_list ap;
+ bool ret;
+
+ va_start (ap, msgid);
+
+ ret = cpp_diagnostic_at (pfile, CPP_DL_PEDWARN, reason, richloc,
+ msgid, &ap);
+
+ va_end (ap);
+ return ret;
+}
+
/* Print a diagnostic at a specific location. */
ATTRIBUTE_FPTR_PRINTF(6,0)
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 56b07ac..176f8c5 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -1268,6 +1268,14 @@ extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason,
const char *msgid, ...)
ATTRIBUTE_PRINTF_3;
+/* As their counterparts above, but use RICHLOC. */
+extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason,
+ rich_location *richloc, const char *msgid, ...)
+ ATTRIBUTE_PRINTF_4;
+extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason,
+ rich_location *richloc, const char *msgid, ...)
+ ATTRIBUTE_PRINTF_4;
+
/* Output a diagnostic with "MSGID: " preceding the
error string of errno. No location is printed. */
extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level,
@@ -1442,43 +1450,95 @@ extern const char * cpp_get_userdef_suffix
/* In charset.c */
+/* The result of attempting to decode a run of UTF-8 bytes. */
+
+struct cpp_decoded_char
+{
+ const char *m_start_byte;
+ const char *m_next_byte;
+
+ bool m_valid_ch;
+ cppchar_t m_ch;
+};
+
+/* Information for mapping between code points and display columns.
+
+ This is a tabstop value, along with a callback for getting the
+ widths of characters. Normally this callback is cpp_wcwidth, but we
+ support other schemes for escaping non-ASCII unicode as a series of
+ ASCII chars when printing the user's source code in diagnostic-show-locus.c
+
+ For example, consider:
+ - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80)
+ - the Unicode character U+1F642 "SLIGHTLY SMILING FACE"
+ (UTF-8: 0xF0 0x9F 0x99 0x82)
+ - the byte 0xBF (a stray trailing byte of a UTF-8 character)
+ Normally U+03C0 would occupy one display column, U+1F642
+ would occupy two display columns, and the stray byte would be
+ printed verbatim as one display column.
+
+ However when escaping them as unicode code points as "<U+03C0>"
+ and "<U+1F642>" they occupy 8 and 9 display columns respectively,
+ and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>"
+ they occupy 8 and 16 display columns respectively. In both cases
+ the stray byte is escaped to <BF> as 4 display columns. */
+
+struct cpp_char_column_policy
+{
+ cpp_char_column_policy (int tabstop,
+ int (*width_cb) (cppchar_t c))
+ : m_tabstop (tabstop),
+ m_undecoded_byte_width (1),
+ m_width_cb (width_cb)
+ {}
+
+ int m_tabstop;
+ /* Width in display columns of a stray byte that isn't decodable
+ as UTF-8. */
+ int m_undecoded_byte_width;
+ int (*m_width_cb) (cppchar_t c);
+};
+
/* A class to manage the state while converting a UTF-8 sequence to cppchar_t
and computing the display width one character at a time. */
class cpp_display_width_computation {
public:
cpp_display_width_computation (const char *data, int data_length,
- int tabstop);
+ const cpp_char_column_policy &policy);
const char *next_byte () const { return m_next; }
int bytes_processed () const { return m_next - m_begin; }
int bytes_left () const { return m_bytes_left; }
bool done () const { return !bytes_left (); }
int display_cols_processed () const { return m_display_cols; }
- int process_next_codepoint ();
+ int process_next_codepoint (cpp_decoded_char *out);
int advance_display_cols (int n);
private:
const char *const m_begin;
const char *m_next;
size_t m_bytes_left;
- const int m_tabstop;
+ const cpp_char_column_policy &m_policy;
int m_display_cols;
};
/* Convenience functions that are simple use cases for class
cpp_display_width_computation. Tab characters will be expanded to spaces
- as determined by TABSTOP. */
+ as determined by POLICY.m_tabstop, and non-printable-ASCII characters
+ will be escaped as per POLICY. */
int cpp_byte_column_to_display_column (const char *data, int data_length,
- int column, int tabstop);
+ int column,
+ const cpp_char_column_policy &policy);
inline int cpp_display_width (const char *data, int data_length,
- int tabstop)
+ const cpp_char_column_policy &policy)
{
return cpp_byte_column_to_display_column (data, data_length, data_length,
- tabstop);
+ policy);
}
int cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col, int tabstop);
+ int display_col,
+ const cpp_char_column_policy &policy);
int cpp_wcwidth (cppchar_t c);
bool cpp_input_conversion_is_trivial (const char *input_charset);
diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index 464494b..8b5e2f8 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -1787,6 +1787,18 @@ class rich_location
const diagnostic_path *get_path () const { return m_path; }
void set_path (const diagnostic_path *path) { m_path = path; }
+ /* A flag for hinting that the diagnostic involves character encoding
+ issues, and thus that it will be helpful to the user if we show some
+ representation of how the characters in the pertinent source lines
+ are encoded.
+ The default is false (i.e. do not escape).
+ When set to true, non-ASCII bytes in the pertinent source lines will
+ be escaped in a manner controlled by the user-supplied option
+ -fdiagnostics-escape-format=, so that the user can better understand
+ what's going on with the encoding in their source file. */
+ bool escape_on_output_p () const { return m_escape_on_output; }
+ void set_escape_on_output (bool flag) { m_escape_on_output = flag; }
+
private:
bool reject_impossible_fixit (location_t where);
void stop_supporting_fixits ();
@@ -1813,6 +1825,7 @@ protected:
bool m_fixits_cannot_be_auto_applied;
const diagnostic_path *m_path;
+ bool m_escape_on_output;
};
/* A struct for the result of range_label::get_text: a NUL-terminated buffer
diff --git a/libcpp/internal.h b/libcpp/internal.h
index fd44de6..8577cab 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -769,6 +769,9 @@ extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
extern void _cpp_pop_buffer (cpp_reader *);
extern char *_cpp_bracket_include (cpp_reader *);
+/* In errors.c */
+extern location_t cpp_diagnostic_get_current_location (cpp_reader *);
+
/* In traditional.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *, bool);
extern bool _cpp_read_logical_line_trad (cpp_reader *);
@@ -935,6 +938,26 @@ int linemap_get_expansion_line (class line_maps *,
const char* linemap_get_expansion_filename (class line_maps *,
location_t);
+/* A subclass of rich_location for emitting a diagnostic
+ at the current location of the reader, but flagging
+ it with set_escape_on_output (true). */
+class encoding_rich_location : public rich_location
+{
+ public:
+ encoding_rich_location (cpp_reader *pfile)
+ : rich_location (pfile->line_table,
+ cpp_diagnostic_get_current_location (pfile))
+ {
+ set_escape_on_output (true);
+ }
+
+ encoding_rich_location (cpp_reader *pfile, location_t loc)
+ : rich_location (pfile->line_table, loc)
+ {
+ set_escape_on_output (true);
+ }
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 8e3ef09..fa2253d 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1268,7 +1268,11 @@ skip_whitespace (cpp_reader *pfile, cppchar_t c)
while (is_nvspace (c));
if (saw_NUL)
- cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
+ {
+ encoding_rich_location rich_loc (pfile);
+ cpp_error_at (pfile, CPP_DL_WARNING, &rich_loc,
+ "null character(s) ignored");
+ }
buffer->cur--;
}
@@ -1297,6 +1301,28 @@ warn_about_normalization (cpp_reader *pfile,
if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
&& !pfile->state.skipping)
{
+ location_t loc = token->src_loc;
+
+ /* If possible, create a location range for the token. */
+ if (loc >= RESERVED_LOCATION_COUNT
+ && token->type != CPP_EOF
+ /* There must be no line notes to process. */
+ && (!(pfile->buffer->cur
+ >= pfile->buffer->notes[pfile->buffer->cur_note].pos
+ && !pfile->overlaid_buffer)))
+ {
+ source_range tok_range;
+ tok_range.m_start = loc;
+ tok_range.m_finish
+ = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (pfile->buffer,
+ pfile->buffer->cur));
+ loc = COMBINE_LOCATION_DATA (pfile->line_table,
+ loc, tok_range, NULL);
+ }
+
+ encoding_rich_location rich_loc (pfile, loc);
+
/* Make sure that the token is printed using UCNs, even
if we'd otherwise happily print UTF-8. */
unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
@@ -1304,14 +1330,14 @@ warn_about_normalization (cpp_reader *pfile,
sz = cpp_spell_token (pfile, token, buf, false) - buf;
if (NORMALIZE_STATE_RESULT (s) == normalized_C)
- cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
- "`%.*s' is not in NFKC", (int) sz, buf);
+ cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
+ "`%.*s' is not in NFKC", (int) sz, buf);
else if (CPP_OPTION (pfile, cxx23_identifiers))
- cpp_pedwarning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
+ cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
"`%.*s' is not in NFC", (int) sz, buf);
else
- cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
- "`%.*s' is not in NFC", (int) sz, buf);
+ cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
+ "`%.*s' is not in NFC", (int) sz, buf);
free (buf);
}
}
diff --git a/libcpp/line-map.c b/libcpp/line-map.c
index 1a6902a..1957dd7 100644
--- a/libcpp/line-map.c
+++ b/libcpp/line-map.c
@@ -2086,7 +2086,8 @@ rich_location::rich_location (line_maps *set, location_t loc,
m_fixit_hints (),
m_seen_impossible_fixit (false),
m_fixits_cannot_be_auto_applied (false),
- m_path (NULL)
+ m_path (NULL),
+ m_escape_on_output (false)
{
add_range (loc, SHOW_RANGE_WITH_CARET, label);
}